aboutsummaryrefslogtreecommitdiffstats
path: root/target/linux/generic/pending-5.15/020-03-mm-multigenerational-lru-groundwork.patch
diff options
context:
space:
mode:
Diffstat (limited to 'target/linux/generic/pending-5.15/020-03-mm-multigenerational-lru-groundwork.patch')
-rw-r--r--target/linux/generic/pending-5.15/020-03-mm-multigenerational-lru-groundwork.patch996
1 files changed, 0 insertions, 996 deletions
diff --git a/target/linux/generic/pending-5.15/020-03-mm-multigenerational-lru-groundwork.patch b/target/linux/generic/pending-5.15/020-03-mm-multigenerational-lru-groundwork.patch
deleted file mode 100644
index fbc15d8c91..0000000000
--- a/target/linux/generic/pending-5.15/020-03-mm-multigenerational-lru-groundwork.patch
+++ /dev/null
@@ -1,996 +0,0 @@
-From 05f366c941ae2bb8ba21c79fafcb747a5a6b967b Mon Sep 17 00:00:00 2001
-From: Yu Zhao <yuzhao@google.com>
-Date: Mon, 25 Jan 2021 21:12:33 -0700
-Subject: [PATCH 04/10] mm: multigenerational lru: groundwork
-
-For each lruvec, evictable pages are divided into multiple
-generations. The youngest generation number is stored in
-lrugen->max_seq for both anon and file types as they are aged on an
-equal footing. The oldest generation numbers are stored in
-lrugen->min_seq[] separately for anon and file types as clean file
-pages can be evicted regardless of swap constraints. These three
-variables are monotonically increasing. Generation numbers are
-truncated into order_base_2(MAX_NR_GENS+1) bits in order to fit into
-page->flags. The sliding window technique is used to prevent truncated
-generation numbers from overlapping. Each truncated generation number
-is an index to
-lrugen->lists[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES].
-
-The framework comprises two conceptually independent components: the
-aging, which produces young generations, and the eviction, which
-consumes old generations. Both can be invoked independently from user
-space for the purpose of working set estimation and proactive reclaim.
-
-The protection of hot pages and the selection of cold pages are based
-on page access types and patterns. There are two access types: one via
-page tables and the other via file descriptors. The protection of the
-former type is by design stronger because:
- 1) The uncertainty in determining the access patterns of the former
- type is higher due to the coalesced nature of the accessed bit.
- 2) The cost of evicting the former type is higher due to the TLB
- flushes required and the likelihood of involving I/O.
- 3) The penalty of under-protecting the former type is higher because
- applications usually do not prepare themselves for major faults like
- they do for blocked I/O. For example, client applications commonly
- dedicate blocked I/O to separate threads to avoid UI janks that
- negatively affect user experience.
-
-There are also two access patterns: one with temporal locality and the
-other without. The latter pattern, e.g., random and sequential, needs
-to be explicitly excluded to avoid weakening the protection of the
-former pattern. Generally the former type follows the former pattern
-unless MADV_SEQUENTIAL is specified and the latter type follows the
-latter pattern unless outlying refaults have been observed.
-
-Upon faulting, a page is added to the youngest generation, which
-provides the strongest protection as the eviction will not consider
-this page before the aging has scanned it at least twice. The first
-scan clears the accessed bit set during the initial fault. And the
-second scan makes sure this page has not been used since the first
-scan. A page from any other generations is brought back to the
-youngest generation whenever the aging finds the accessed bit set on
-any of the PTEs mapping this page.
-
-Unmapped pages are initially added to the oldest generation and then
-conditionally protected by tiers. This is done later [PATCH 07/10].
-
-Signed-off-by: Yu Zhao <yuzhao@google.com>
-Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
-Change-Id: I71de7cd15b8dfa6f9fdd838023474693c4fee0a7
----
- fs/fuse/dev.c | 3 +-
- include/linux/cgroup.h | 15 +-
- include/linux/mm.h | 36 ++++
- include/linux/mm_inline.h | 182 ++++++++++++++++++++
- include/linux/mmzone.h | 70 ++++++++
- include/linux/page-flags-layout.h | 19 ++-
- include/linux/page-flags.h | 4 +-
- include/linux/sched.h | 3 +
- kernel/bounds.c | 3 +
- kernel/cgroup/cgroup-internal.h | 1 -
- mm/huge_memory.c | 3 +-
- mm/memcontrol.c | 1 +
- mm/memory.c | 7 +
- mm/mm_init.c | 6 +-
- mm/page_alloc.c | 1 +
- mm/swap.c | 9 +-
- mm/swapfile.c | 2 +
- mm/vmscan.c | 268 ++++++++++++++++++++++++++++++
- 18 files changed, 618 insertions(+), 15 deletions(-)
-
---- a/fs/fuse/dev.c
-+++ b/fs/fuse/dev.c
-@@ -785,7 +785,8 @@ static int fuse_check_page(struct page *
- 1 << PG_active |
- 1 << PG_workingset |
- 1 << PG_reclaim |
-- 1 << PG_waiters))) {
-+ 1 << PG_waiters |
-+ LRU_GEN_MASK | LRU_REFS_MASK))) {
- dump_page(page, "fuse: trying to steal weird page");
- return 1;
- }
---- a/include/linux/cgroup.h
-+++ b/include/linux/cgroup.h
-@@ -432,6 +432,18 @@ static inline void cgroup_put(struct cgr
- css_put(&cgrp->self);
- }
-
-+extern struct mutex cgroup_mutex;
-+
-+static inline void cgroup_lock(void)
-+{
-+ mutex_lock(&cgroup_mutex);
-+}
-+
-+static inline void cgroup_unlock(void)
-+{
-+ mutex_unlock(&cgroup_mutex);
-+}
-+
- /**
- * task_css_set_check - obtain a task's css_set with extra access conditions
- * @task: the task to obtain css_set for
-@@ -446,7 +458,6 @@ static inline void cgroup_put(struct cgr
- * as locks used during the cgroup_subsys::attach() methods.
- */
- #ifdef CONFIG_PROVE_RCU
--extern struct mutex cgroup_mutex;
- extern spinlock_t css_set_lock;
- #define task_css_set_check(task, __c) \
- rcu_dereference_check((task)->cgroups, \
-@@ -707,6 +718,8 @@ struct cgroup;
- static inline u64 cgroup_id(const struct cgroup *cgrp) { return 1; }
- static inline void css_get(struct cgroup_subsys_state *css) {}
- static inline void css_put(struct cgroup_subsys_state *css) {}
-+static inline void cgroup_lock(void) {}
-+static inline void cgroup_unlock(void) {}
- static inline int cgroup_attach_task_all(struct task_struct *from,
- struct task_struct *t) { return 0; }
- static inline int cgroupstats_build(struct cgroupstats *stats,
---- a/include/linux/mm.h
-+++ b/include/linux/mm.h
-@@ -1093,6 +1093,8 @@ vm_fault_t finish_mkwrite_fault(struct v
- #define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH)
- #define LAST_CPUPID_PGOFF (ZONES_PGOFF - LAST_CPUPID_WIDTH)
- #define KASAN_TAG_PGOFF (LAST_CPUPID_PGOFF - KASAN_TAG_WIDTH)
-+#define LRU_GEN_PGOFF (KASAN_TAG_PGOFF - LRU_GEN_WIDTH)
-+#define LRU_REFS_PGOFF (LRU_GEN_PGOFF - LRU_REFS_WIDTH)
-
- /*
- * Define the bit shifts to access each section. For non-existent
-@@ -1807,6 +1809,40 @@ static inline void unmap_mapping_range(s
- loff_t const holebegin, loff_t const holelen, int even_cows) { }
- #endif
-
-+#ifdef CONFIG_LRU_GEN
-+static inline void task_enter_nonseq_fault(void)
-+{
-+ WARN_ON(current->in_nonseq_fault);
-+
-+ current->in_nonseq_fault = 1;
-+}
-+
-+static inline void task_exit_nonseq_fault(void)
-+{
-+ WARN_ON(!current->in_nonseq_fault);
-+
-+ current->in_nonseq_fault = 0;
-+}
-+
-+static inline bool task_in_nonseq_fault(void)
-+{
-+ return current->in_nonseq_fault;
-+}
-+#else
-+static inline void task_enter_nonseq_fault(void)
-+{
-+}
-+
-+static inline void task_exit_nonseq_fault(void)
-+{
-+}
-+
-+static inline bool task_in_nonseq_fault(void)
-+{
-+ return false;
-+}
-+#endif /* CONFIG_LRU_GEN */
-+
- static inline void unmap_shared_mapping_range(struct address_space *mapping,
- loff_t const holebegin, loff_t const holelen)
- {
---- a/include/linux/mm_inline.h
-+++ b/include/linux/mm_inline.h
-@@ -79,11 +79,187 @@ static __always_inline enum lru_list pag
- return lru;
- }
-
-+#ifdef CONFIG_LRU_GEN
-+
-+static inline bool lru_gen_enabled(void)
-+{
-+#ifdef CONFIG_LRU_GEN_ENABLED
-+ DECLARE_STATIC_KEY_TRUE(lru_gen_static_key);
-+
-+ return static_branch_likely(&lru_gen_static_key);
-+#else
-+ DECLARE_STATIC_KEY_FALSE(lru_gen_static_key);
-+
-+ return static_branch_unlikely(&lru_gen_static_key);
-+#endif
-+}
-+
-+/* Return an index within the sliding window that tracks MAX_NR_GENS generations. */
-+static inline int lru_gen_from_seq(unsigned long seq)
-+{
-+ return seq % MAX_NR_GENS;
-+}
-+
-+/* The youngest and the second youngest generations are counted as active. */
-+static inline bool lru_gen_is_active(struct lruvec *lruvec, int gen)
-+{
-+ unsigned long max_seq = lruvec->evictable.max_seq;
-+
-+ VM_BUG_ON(gen >= MAX_NR_GENS);
-+
-+ return gen == lru_gen_from_seq(max_seq) || gen == lru_gen_from_seq(max_seq - 1);
-+}
-+
-+/* Update the sizes of the multigenerational lru lists. */
-+static inline void lru_gen_update_size(struct page *page, struct lruvec *lruvec,
-+ int old_gen, int new_gen)
-+{
-+ int type = page_is_file_lru(page);
-+ int zone = page_zonenum(page);
-+ int delta = thp_nr_pages(page);
-+ enum lru_list lru = type * LRU_FILE;
-+ struct lrugen *lrugen = &lruvec->evictable;
-+
-+ lockdep_assert_held(&lruvec->lru_lock);
-+ VM_BUG_ON(old_gen != -1 && old_gen >= MAX_NR_GENS);
-+ VM_BUG_ON(new_gen != -1 && new_gen >= MAX_NR_GENS);
-+ VM_BUG_ON(old_gen == -1 && new_gen == -1);
-+
-+ if (old_gen >= 0)
-+ WRITE_ONCE(lrugen->sizes[old_gen][type][zone],
-+ lrugen->sizes[old_gen][type][zone] - delta);
-+ if (new_gen >= 0)
-+ WRITE_ONCE(lrugen->sizes[new_gen][type][zone],
-+ lrugen->sizes[new_gen][type][zone] + delta);
-+
-+ if (old_gen < 0) {
-+ if (lru_gen_is_active(lruvec, new_gen))
-+ lru += LRU_ACTIVE;
-+ update_lru_size(lruvec, lru, zone, delta);
-+ return;
-+ }
-+
-+ if (new_gen < 0) {
-+ if (lru_gen_is_active(lruvec, old_gen))
-+ lru += LRU_ACTIVE;
-+ update_lru_size(lruvec, lru, zone, -delta);
-+ return;
-+ }
-+
-+ if (!lru_gen_is_active(lruvec, old_gen) && lru_gen_is_active(lruvec, new_gen)) {
-+ update_lru_size(lruvec, lru, zone, -delta);
-+ update_lru_size(lruvec, lru + LRU_ACTIVE, zone, delta);
-+ }
-+
-+ VM_BUG_ON(lru_gen_is_active(lruvec, old_gen) && !lru_gen_is_active(lruvec, new_gen));
-+}
-+
-+/* Add a page to one of the multigenerational lru lists. Return true on success. */
-+static inline bool lru_gen_add_page(struct page *page, struct lruvec *lruvec, bool reclaiming)
-+{
-+ int gen;
-+ unsigned long old_flags, new_flags;
-+ int type = page_is_file_lru(page);
-+ int zone = page_zonenum(page);
-+ struct lrugen *lrugen = &lruvec->evictable;
-+
-+ if (PageUnevictable(page) || !lrugen->enabled[type])
-+ return false;
-+ /*
-+ * If a page shouldn't be considered for eviction, i.e., a page mapped
-+ * upon fault during which the accessed bit is set, add it to the
-+ * youngest generation.
-+ *
-+ * If a page can't be evicted immediately, i.e., an anon page not in
-+ * swap cache or a dirty page pending writeback, add it to the second
-+ * oldest generation.
-+ *
-+ * If a page could be evicted immediately, e.g., a clean page, add it to
-+ * the oldest generation.
-+ */
-+ if (PageActive(page))
-+ gen = lru_gen_from_seq(lrugen->max_seq);
-+ else if ((!type && !PageSwapCache(page)) ||
-+ (PageReclaim(page) && (PageDirty(page) || PageWriteback(page))))
-+ gen = lru_gen_from_seq(lrugen->min_seq[type] + 1);
-+ else
-+ gen = lru_gen_from_seq(lrugen->min_seq[type]);
-+
-+ do {
-+ new_flags = old_flags = READ_ONCE(page->flags);
-+ VM_BUG_ON_PAGE(new_flags & LRU_GEN_MASK, page);
-+
-+ new_flags &= ~(LRU_GEN_MASK | BIT(PG_active));
-+ new_flags |= (gen + 1UL) << LRU_GEN_PGOFF;
-+ } while (cmpxchg(&page->flags, old_flags, new_flags) != old_flags);
-+
-+ lru_gen_update_size(page, lruvec, -1, gen);
-+ /* for rotate_reclaimable_page() */
-+ if (reclaiming)
-+ list_add_tail(&page->lru, &lrugen->lists[gen][type][zone]);
-+ else
-+ list_add(&page->lru, &lrugen->lists[gen][type][zone]);
-+
-+ return true;
-+}
-+
-+/* Delete a page from one of the multigenerational lru lists. Return true on success. */
-+static inline bool lru_gen_del_page(struct page *page, struct lruvec *lruvec, bool reclaiming)
-+{
-+ int gen;
-+ unsigned long old_flags, new_flags;
-+
-+ do {
-+ new_flags = old_flags = READ_ONCE(page->flags);
-+ if (!(new_flags & LRU_GEN_MASK))
-+ return false;
-+
-+ VM_BUG_ON_PAGE(PageActive(page), page);
-+ VM_BUG_ON_PAGE(PageUnevictable(page), page);
-+
-+ gen = ((new_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
-+
-+ new_flags &= ~LRU_GEN_MASK;
-+ /* for shrink_page_list() */
-+ if (reclaiming)
-+ new_flags &= ~(BIT(PG_referenced) | BIT(PG_reclaim));
-+ else if (lru_gen_is_active(lruvec, gen))
-+ new_flags |= BIT(PG_active);
-+ } while (cmpxchg(&page->flags, old_flags, new_flags) != old_flags);
-+
-+ lru_gen_update_size(page, lruvec, gen, -1);
-+ list_del(&page->lru);
-+
-+ return true;
-+}
-+
-+#else
-+
-+static inline bool lru_gen_enabled(void)
-+{
-+ return false;
-+}
-+
-+static inline bool lru_gen_add_page(struct page *page, struct lruvec *lruvec, bool reclaiming)
-+{
-+ return false;
-+}
-+
-+static inline bool lru_gen_del_page(struct page *page, struct lruvec *lruvec, bool reclaiming)
-+{
-+ return false;
-+}
-+
-+#endif /* CONFIG_LRU_GEN */
-+
- static __always_inline void add_page_to_lru_list(struct page *page,
- struct lruvec *lruvec)
- {
- enum lru_list lru = page_lru(page);
-
-+ if (lru_gen_add_page(page, lruvec, false))
-+ return;
-+
- update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page));
- list_add(&page->lru, &lruvec->lists[lru]);
- }
-@@ -93,6 +269,9 @@ static __always_inline void add_page_to_
- {
- enum lru_list lru = page_lru(page);
-
-+ if (lru_gen_add_page(page, lruvec, true))
-+ return;
-+
- update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page));
- list_add_tail(&page->lru, &lruvec->lists[lru]);
- }
-@@ -100,6 +279,9 @@ static __always_inline void add_page_to_
- static __always_inline void del_page_from_lru_list(struct page *page,
- struct lruvec *lruvec)
- {
-+ if (lru_gen_del_page(page, lruvec, false))
-+ return;
-+
- list_del(&page->lru);
- update_lru_size(lruvec, page_lru(page), page_zonenum(page),
- -thp_nr_pages(page));
---- a/include/linux/mmzone.h
-+++ b/include/linux/mmzone.h
-@@ -294,6 +294,72 @@ enum lruvec_flags {
- */
- };
-
-+struct lruvec;
-+
-+#define LRU_GEN_MASK ((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF)
-+#define LRU_REFS_MASK ((BIT(LRU_REFS_WIDTH) - 1) << LRU_REFS_PGOFF)
-+
-+#ifdef CONFIG_LRU_GEN
-+
-+/*
-+ * For each lruvec, evictable pages are divided into multiple generations. The
-+ * youngest and the oldest generation numbers, AKA max_seq and min_seq, are
-+ * monotonically increasing. The sliding window technique is used to track at
-+ * least MIN_NR_GENS and at most MAX_NR_GENS generations. An offset within the
-+ * window, AKA gen, indexes an array of per-type and per-zone lists for the
-+ * corresponding generation. The counter in page->flags stores gen+1 while a
-+ * page is on one of the multigenerational lru lists. Otherwise, it stores 0.
-+ *
-+ * After a page is faulted in, the aging must check the accessed bit at least
-+ * twice before the eviction would consider it. The first check clears the
-+ * accessed bit set during the initial fault. The second check makes sure this
-+ * page hasn't been used since then.
-+ */
-+#define MIN_NR_GENS 2
-+#define MAX_NR_GENS ((unsigned int)CONFIG_NR_LRU_GENS)
-+
-+struct lrugen {
-+ /* the aging increments the max generation number */
-+ unsigned long max_seq;
-+ /* the eviction increments the min generation numbers */
-+ unsigned long min_seq[ANON_AND_FILE];
-+ /* the birth time of each generation in jiffies */
-+ unsigned long timestamps[MAX_NR_GENS];
-+ /* the multigenerational lru lists */
-+ struct list_head lists[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
-+ /* the sizes of the multigenerational lru lists in pages */
-+ unsigned long sizes[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
-+ /* whether the multigenerational lru is enabled */
-+ bool enabled[ANON_AND_FILE];
-+};
-+
-+#define MAX_BATCH_SIZE 8192
-+
-+void lru_gen_init_state(struct mem_cgroup *memcg, struct lruvec *lruvec);
-+void lru_gen_change_state(bool enable, bool main, bool swap);
-+
-+#ifdef CONFIG_MEMCG
-+void lru_gen_init_memcg(struct mem_cgroup *memcg);
-+#endif
-+
-+#else /* !CONFIG_LRU_GEN */
-+
-+static inline void lru_gen_init_state(struct mem_cgroup *memcg, struct lruvec *lruvec)
-+{
-+}
-+
-+static inline void lru_gen_change_state(bool enable, bool main, bool swap)
-+{
-+}
-+
-+#ifdef CONFIG_MEMCG
-+static inline void lru_gen_init_memcg(struct mem_cgroup *memcg)
-+{
-+}
-+#endif
-+
-+#endif /* CONFIG_LRU_GEN */
-+
- struct lruvec {
- struct list_head lists[NR_LRU_LISTS];
- /* per lruvec lru_lock for memcg */
-@@ -311,6 +377,10 @@ struct lruvec {
- unsigned long refaults[ANON_AND_FILE];
- /* Various lruvec state flags (enum lruvec_flags) */
- unsigned long flags;
-+#ifdef CONFIG_LRU_GEN
-+ /* unevictable pages are on LRU_UNEVICTABLE */
-+ struct lrugen evictable;
-+#endif
- #ifdef CONFIG_MEMCG
- struct pglist_data *pgdat;
- #endif
---- a/include/linux/page-flags-layout.h
-+++ b/include/linux/page-flags-layout.h
-@@ -26,6 +26,14 @@
-
- #define ZONES_WIDTH ZONES_SHIFT
-
-+#ifdef CONFIG_LRU_GEN
-+/* LRU_GEN_WIDTH is generated from order_base_2(CONFIG_NR_LRU_GENS + 1). */
-+#define LRU_REFS_WIDTH (CONFIG_TIERS_PER_GEN - 2)
-+#else
-+#define LRU_GEN_WIDTH 0
-+#define LRU_REFS_WIDTH 0
-+#endif /* CONFIG_LRU_GEN */
-+
- #ifdef CONFIG_SPARSEMEM
- #include <asm/sparsemem.h>
- #define SECTIONS_SHIFT (MAX_PHYSMEM_BITS - SECTION_SIZE_BITS)
-@@ -55,7 +63,8 @@
- #define SECTIONS_WIDTH 0
- #endif
-
--#if ZONES_WIDTH + SECTIONS_WIDTH + NODES_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS
-+#if ZONES_WIDTH + LRU_GEN_WIDTH + LRU_REFS_WIDTH + SECTIONS_WIDTH + NODES_SHIFT \
-+ <= BITS_PER_LONG - NR_PAGEFLAGS
- #define NODES_WIDTH NODES_SHIFT
- #elif defined(CONFIG_SPARSEMEM_VMEMMAP)
- #error "Vmemmap: No space for nodes field in page flags"
-@@ -89,8 +98,8 @@
- #define LAST_CPUPID_SHIFT 0
- #endif
-
--#if ZONES_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + KASAN_TAG_WIDTH + LAST_CPUPID_SHIFT \
-- <= BITS_PER_LONG - NR_PAGEFLAGS
-+#if ZONES_WIDTH + LRU_GEN_WIDTH + LRU_REFS_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + \
-+ KASAN_TAG_WIDTH + LAST_CPUPID_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS
- #define LAST_CPUPID_WIDTH LAST_CPUPID_SHIFT
- #else
- #define LAST_CPUPID_WIDTH 0
-@@ -100,8 +109,8 @@
- #define LAST_CPUPID_NOT_IN_PAGE_FLAGS
- #endif
-
--#if ZONES_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + KASAN_TAG_WIDTH + LAST_CPUPID_WIDTH \
-- > BITS_PER_LONG - NR_PAGEFLAGS
-+#if ZONES_WIDTH + LRU_GEN_WIDTH + LRU_REFS_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + \
-+ KASAN_TAG_WIDTH + LAST_CPUPID_WIDTH > BITS_PER_LONG - NR_PAGEFLAGS
- #error "Not enough bits in page flags"
- #endif
-
---- a/include/linux/page-flags.h
-+++ b/include/linux/page-flags.h
-@@ -845,7 +845,7 @@ static inline void ClearPageSlabPfmemall
- 1UL << PG_private | 1UL << PG_private_2 | \
- 1UL << PG_writeback | 1UL << PG_reserved | \
- 1UL << PG_slab | 1UL << PG_active | \
-- 1UL << PG_unevictable | __PG_MLOCKED)
-+ 1UL << PG_unevictable | __PG_MLOCKED | LRU_GEN_MASK)
-
- /*
- * Flags checked when a page is prepped for return by the page allocator.
-@@ -856,7 +856,7 @@ static inline void ClearPageSlabPfmemall
- * alloc-free cycle to prevent from reusing the page.
- */
- #define PAGE_FLAGS_CHECK_AT_PREP \
-- (PAGEFLAGS_MASK & ~__PG_HWPOISON)
-+ ((PAGEFLAGS_MASK & ~__PG_HWPOISON) | LRU_GEN_MASK | LRU_REFS_MASK)
-
- #define PAGE_FLAGS_PRIVATE \
- (1UL << PG_private | 1UL << PG_private_2)
---- a/include/linux/sched.h
-+++ b/include/linux/sched.h
-@@ -911,6 +911,9 @@ struct task_struct {
- #ifdef CONFIG_MEMCG
- unsigned in_user_fault:1;
- #endif
-+#ifdef CONFIG_LRU_GEN
-+ unsigned in_nonseq_fault:1;
-+#endif
- #ifdef CONFIG_COMPAT_BRK
- unsigned brk_randomized:1;
- #endif
---- a/kernel/bounds.c
-+++ b/kernel/bounds.c
-@@ -22,6 +22,9 @@ int main(void)
- DEFINE(NR_CPUS_BITS, ilog2(CONFIG_NR_CPUS));
- #endif
- DEFINE(SPINLOCK_SIZE, sizeof(spinlock_t));
-+#ifdef CONFIG_LRU_GEN
-+ DEFINE(LRU_GEN_WIDTH, order_base_2(CONFIG_NR_LRU_GENS + 1));
-+#endif
- /* End of constants */
-
- return 0;
---- a/kernel/cgroup/cgroup-internal.h
-+++ b/kernel/cgroup/cgroup-internal.h
-@@ -165,7 +165,6 @@ struct cgroup_mgctx {
- #define DEFINE_CGROUP_MGCTX(name) \
- struct cgroup_mgctx name = CGROUP_MGCTX_INIT(name)
-
--extern struct mutex cgroup_mutex;
- extern spinlock_t css_set_lock;
- extern struct cgroup_subsys *cgroup_subsys[];
- extern struct list_head cgroup_roots;
---- a/mm/huge_memory.c
-+++ b/mm/huge_memory.c
-@@ -2364,7 +2364,8 @@ static void __split_huge_page_tail(struc
- #ifdef CONFIG_64BIT
- (1L << PG_arch_2) |
- #endif
-- (1L << PG_dirty)));
-+ (1L << PG_dirty) |
-+ LRU_GEN_MASK | LRU_REFS_MASK));
-
- /* ->mapping in first tail page is compound_mapcount */
- VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING,
---- a/mm/memcontrol.c
-+++ b/mm/memcontrol.c
-@@ -5226,6 +5226,7 @@ static struct mem_cgroup *mem_cgroup_all
- memcg->deferred_split_queue.split_queue_len = 0;
- #endif
- idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
-+ lru_gen_init_memcg(memcg);
- return memcg;
- fail:
- mem_cgroup_id_remove(memcg);
---- a/mm/memory.c
-+++ b/mm/memory.c
-@@ -4788,6 +4788,7 @@ vm_fault_t handle_mm_fault(struct vm_are
- unsigned int flags, struct pt_regs *regs)
- {
- vm_fault_t ret;
-+ bool nonseq_fault = !(vma->vm_flags & VM_SEQ_READ);
-
- __set_current_state(TASK_RUNNING);
-
-@@ -4809,11 +4810,17 @@ vm_fault_t handle_mm_fault(struct vm_are
- if (flags & FAULT_FLAG_USER)
- mem_cgroup_enter_user_fault();
-
-+ if (nonseq_fault)
-+ task_enter_nonseq_fault();
-+
- if (unlikely(is_vm_hugetlb_page(vma)))
- ret = hugetlb_fault(vma->vm_mm, vma, address, flags);
- else
- ret = __handle_mm_fault(vma, address, flags);
-
-+ if (nonseq_fault)
-+ task_exit_nonseq_fault();
-+
- if (flags & FAULT_FLAG_USER) {
- mem_cgroup_exit_user_fault();
- /*
---- a/mm/mm_init.c
-+++ b/mm/mm_init.c
-@@ -65,14 +65,16 @@ void __init mminit_verify_pageflags_layo
-
- shift = 8 * sizeof(unsigned long);
- width = shift - SECTIONS_WIDTH - NODES_WIDTH - ZONES_WIDTH
-- - LAST_CPUPID_SHIFT - KASAN_TAG_WIDTH;
-+ - LAST_CPUPID_SHIFT - KASAN_TAG_WIDTH - LRU_GEN_WIDTH - LRU_REFS_WIDTH;
- mminit_dprintk(MMINIT_TRACE, "pageflags_layout_widths",
-- "Section %d Node %d Zone %d Lastcpupid %d Kasantag %d Flags %d\n",
-+ "Section %d Node %d Zone %d Lastcpupid %d Kasantag %d Gen %d Tier %d Flags %d\n",
- SECTIONS_WIDTH,
- NODES_WIDTH,
- ZONES_WIDTH,
- LAST_CPUPID_WIDTH,
- KASAN_TAG_WIDTH,
-+ LRU_GEN_WIDTH,
-+ LRU_REFS_WIDTH,
- NR_PAGEFLAGS);
- mminit_dprintk(MMINIT_TRACE, "pageflags_layout_shifts",
- "Section %d Node %d Zone %d Lastcpupid %d Kasantag %d\n",
---- a/mm/page_alloc.c
-+++ b/mm/page_alloc.c
-@@ -7456,6 +7456,7 @@ static void __meminit pgdat_init_interna
-
- pgdat_page_ext_init(pgdat);
- lruvec_init(&pgdat->__lruvec);
-+ lru_gen_init_state(NULL, &pgdat->__lruvec);
- }
-
- static void __meminit zone_init_internals(struct zone *zone, enum zone_type idx, int nid,
---- a/mm/swap.c
-+++ b/mm/swap.c
-@@ -446,6 +446,11 @@ void lru_cache_add(struct page *page)
- VM_BUG_ON_PAGE(PageActive(page) && PageUnevictable(page), page);
- VM_BUG_ON_PAGE(PageLRU(page), page);
-
-+ /* see the comment in lru_gen_add_page() */
-+ if (lru_gen_enabled() && !PageUnevictable(page) &&
-+ task_in_nonseq_fault() && !(current->flags & PF_MEMALLOC))
-+ SetPageActive(page);
-+
- get_page(page);
- local_lock(&lru_pvecs.lock);
- pvec = this_cpu_ptr(&lru_pvecs.lru_add);
-@@ -547,7 +552,7 @@ static void lru_deactivate_file_fn(struc
-
- static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec)
- {
-- if (PageActive(page) && !PageUnevictable(page)) {
-+ if (!PageUnevictable(page) && (PageActive(page) || lru_gen_enabled())) {
- int nr_pages = thp_nr_pages(page);
-
- del_page_from_lru_list(page, lruvec);
-@@ -661,7 +666,7 @@ void deactivate_file_page(struct page *p
- */
- void deactivate_page(struct page *page)
- {
-- if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {
-+ if (PageLRU(page) && !PageUnevictable(page) && (PageActive(page) || lru_gen_enabled())) {
- struct pagevec *pvec;
-
- local_lock(&lru_pvecs.lock);
---- a/mm/swapfile.c
-+++ b/mm/swapfile.c
-@@ -2688,6 +2688,7 @@ SYSCALL_DEFINE1(swapoff, const char __us
- err = 0;
- atomic_inc(&proc_poll_event);
- wake_up_interruptible(&proc_poll_wait);
-+ lru_gen_change_state(false, false, true);
-
- out_dput:
- filp_close(victim, NULL);
-@@ -3349,6 +3350,7 @@ SYSCALL_DEFINE2(swapon, const char __use
- mutex_unlock(&swapon_mutex);
- atomic_inc(&proc_poll_event);
- wake_up_interruptible(&proc_poll_wait);
-+ lru_gen_change_state(true, false, true);
-
- error = 0;
- goto out;
---- a/mm/vmscan.c
-+++ b/mm/vmscan.c
-@@ -50,6 +50,7 @@
- #include <linux/printk.h>
- #include <linux/dax.h>
- #include <linux/psi.h>
-+#include <linux/memory.h>
-
- #include <asm/tlbflush.h>
- #include <asm/div64.h>
-@@ -2880,6 +2881,273 @@ static bool can_age_anon_pages(struct pg
- return can_demote(pgdat->node_id, sc);
- }
-
-+#ifdef CONFIG_LRU_GEN
-+
-+/******************************************************************************
-+ * shorthand helpers
-+ ******************************************************************************/
-+
-+#define for_each_gen_type_zone(gen, type, zone) \
-+ for ((gen) = 0; (gen) < MAX_NR_GENS; (gen)++) \
-+ for ((type) = 0; (type) < ANON_AND_FILE; (type)++) \
-+ for ((zone) = 0; (zone) < MAX_NR_ZONES; (zone)++)
-+
-+static int page_lru_gen(struct page *page)
-+{
-+ unsigned long flags = READ_ONCE(page->flags);
-+
-+ return ((flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
-+}
-+
-+static struct lruvec *get_lruvec(int nid, struct mem_cgroup *memcg)
-+{
-+ struct pglist_data *pgdat = NODE_DATA(nid);
-+
-+#ifdef CONFIG_MEMCG
-+ if (memcg) {
-+ struct lruvec *lruvec = &memcg->nodeinfo[nid]->lruvec;
-+
-+ if (lruvec->pgdat != pgdat)
-+ lruvec->pgdat = pgdat;
-+
-+ return lruvec;
-+ }
-+#endif
-+ return pgdat ? &pgdat->__lruvec : NULL;
-+}
-+
-+static int get_nr_gens(struct lruvec *lruvec, int type)
-+{
-+ return lruvec->evictable.max_seq - lruvec->evictable.min_seq[type] + 1;
-+}
-+
-+static bool __maybe_unused seq_is_valid(struct lruvec *lruvec)
-+{
-+ return get_nr_gens(lruvec, 1) >= MIN_NR_GENS &&
-+ get_nr_gens(lruvec, 1) <= get_nr_gens(lruvec, 0) &&
-+ get_nr_gens(lruvec, 0) <= MAX_NR_GENS;
-+}
-+
-+/******************************************************************************
-+ * state change
-+ ******************************************************************************/
-+
-+#ifdef CONFIG_LRU_GEN_ENABLED
-+DEFINE_STATIC_KEY_TRUE(lru_gen_static_key);
-+#else
-+DEFINE_STATIC_KEY_FALSE(lru_gen_static_key);
-+#endif
-+
-+static int lru_gen_nr_swapfiles;
-+
-+static bool __maybe_unused state_is_valid(struct lruvec *lruvec)
-+{
-+ int gen, type, zone;
-+ enum lru_list lru;
-+ struct lrugen *lrugen = &lruvec->evictable;
-+
-+ for_each_evictable_lru(lru) {
-+ type = is_file_lru(lru);
-+
-+ if (lrugen->enabled[type] && !list_empty(&lruvec->lists[lru]))
-+ return false;
-+ }
-+
-+ for_each_gen_type_zone(gen, type, zone) {
-+ if (!lrugen->enabled[type] && !list_empty(&lrugen->lists[gen][type][zone]))
-+ return false;
-+
-+ /* unlikely but not a bug when reset_batch_size() is pending */
-+ VM_WARN_ON(!lrugen->enabled[type] && lrugen->sizes[gen][type][zone]);
-+ }
-+
-+ return true;
-+}
-+
-+static bool fill_lists(struct lruvec *lruvec)
-+{
-+ enum lru_list lru;
-+ int remaining = MAX_BATCH_SIZE;
-+
-+ for_each_evictable_lru(lru) {
-+ int type = is_file_lru(lru);
-+ bool active = is_active_lru(lru);
-+ struct list_head *head = &lruvec->lists[lru];
-+
-+ if (!lruvec->evictable.enabled[type])
-+ continue;
-+
-+ while (!list_empty(head)) {
-+ bool success;
-+ struct page *page = lru_to_page(head);
-+
-+ VM_BUG_ON_PAGE(PageTail(page), page);
-+ VM_BUG_ON_PAGE(PageUnevictable(page), page);
-+ VM_BUG_ON_PAGE(PageActive(page) != active, page);
-+ VM_BUG_ON_PAGE(page_is_file_lru(page) != type, page);
-+ VM_BUG_ON_PAGE(page_lru_gen(page) < MAX_NR_GENS, page);
-+
-+ prefetchw_prev_lru_page(page, head, flags);
-+
-+ del_page_from_lru_list(page, lruvec);
-+ success = lru_gen_add_page(page, lruvec, false);
-+ VM_BUG_ON(!success);
-+
-+ if (!--remaining)
-+ return false;
-+ }
-+ }
-+
-+ return true;
-+}
-+
-+static bool drain_lists(struct lruvec *lruvec)
-+{
-+ int gen, type, zone;
-+ int remaining = MAX_BATCH_SIZE;
-+
-+ for_each_gen_type_zone(gen, type, zone) {
-+ struct list_head *head = &lruvec->evictable.lists[gen][type][zone];
-+
-+ if (lruvec->evictable.enabled[type])
-+ continue;
-+
-+ while (!list_empty(head)) {
-+ bool success;
-+ struct page *page = lru_to_page(head);
-+
-+ VM_BUG_ON_PAGE(PageTail(page), page);
-+ VM_BUG_ON_PAGE(PageUnevictable(page), page);
-+ VM_BUG_ON_PAGE(PageActive(page), page);
-+ VM_BUG_ON_PAGE(page_is_file_lru(page) != type, page);
-+ VM_BUG_ON_PAGE(page_zonenum(page) != zone, page);
-+
-+ prefetchw_prev_lru_page(page, head, flags);
-+
-+ success = lru_gen_del_page(page, lruvec, false);
-+ VM_BUG_ON(!success);
-+ add_page_to_lru_list(page, lruvec);
-+
-+ if (!--remaining)
-+ return false;
-+ }
-+ }
-+
-+ return true;
-+}
-+
-+/*
-+ * For file page tracking, we enable/disable it according to the main switch.
-+ * For anon page tracking, we only enabled it when the main switch is on and
-+ * there is at least one swapfile; we disable it when there are no swapfiles
-+ * regardless of the value of the main switch. Otherwise, we will eventually
-+ * reach the max size of the sliding window and have to call inc_min_seq().
-+ */
-+void lru_gen_change_state(bool enable, bool main, bool swap)
-+{
-+ static DEFINE_MUTEX(state_mutex);
-+
-+ struct mem_cgroup *memcg;
-+
-+ mem_hotplug_begin();
-+ cgroup_lock();
-+ mutex_lock(&state_mutex);
-+
-+ if (swap) {
-+ if (enable)
-+ swap = !lru_gen_nr_swapfiles++;
-+ else
-+ swap = !--lru_gen_nr_swapfiles;
-+ }
-+
-+ if (main && enable != lru_gen_enabled()) {
-+ if (enable)
-+ static_branch_enable(&lru_gen_static_key);
-+ else
-+ static_branch_disable(&lru_gen_static_key);
-+ } else if (!swap || !lru_gen_enabled())
-+ goto unlock;
-+
-+ memcg = mem_cgroup_iter(NULL, NULL, NULL);
-+ do {
-+ int nid;
-+
-+ for_each_node(nid) {
-+ struct lruvec *lruvec = get_lruvec(nid, memcg);
-+
-+ if (!lruvec)
-+ continue;
-+
-+ spin_lock_irq(&lruvec->lru_lock);
-+
-+ VM_BUG_ON(!seq_is_valid(lruvec));
-+ VM_BUG_ON(!state_is_valid(lruvec));
-+
-+ lruvec->evictable.enabled[0] = lru_gen_enabled() && lru_gen_nr_swapfiles;
-+ lruvec->evictable.enabled[1] = lru_gen_enabled();
-+
-+ while (!(enable ? fill_lists(lruvec) : drain_lists(lruvec))) {
-+ spin_unlock_irq(&lruvec->lru_lock);
-+ cond_resched();
-+ spin_lock_irq(&lruvec->lru_lock);
-+ }
-+
-+ spin_unlock_irq(&lruvec->lru_lock);
-+ }
-+
-+ cond_resched();
-+ } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
-+unlock:
-+ mutex_unlock(&state_mutex);
-+ cgroup_unlock();
-+ mem_hotplug_done();
-+}
-+
-+/******************************************************************************
-+ * initialization
-+ ******************************************************************************/
-+
-+void lru_gen_init_state(struct mem_cgroup *memcg, struct lruvec *lruvec)
-+{
-+ int i;
-+ int gen, type, zone;
-+ struct lrugen *lrugen = &lruvec->evictable;
-+
-+ lrugen->max_seq = MIN_NR_GENS + 1;
-+ lrugen->enabled[0] = lru_gen_enabled() && lru_gen_nr_swapfiles;
-+ lrugen->enabled[1] = lru_gen_enabled();
-+
-+ for (i = 0; i <= MIN_NR_GENS + 1; i++)
-+ lrugen->timestamps[i] = jiffies;
-+
-+ for_each_gen_type_zone(gen, type, zone)
-+ INIT_LIST_HEAD(&lrugen->lists[gen][type][zone]);
-+}
-+
-+#ifdef CONFIG_MEMCG
-+void lru_gen_init_memcg(struct mem_cgroup *memcg)
-+{
-+ int nid;
-+
-+ for_each_node(nid) {
-+ struct lruvec *lruvec = get_lruvec(nid, memcg);
-+
-+ lru_gen_init_state(memcg, lruvec);
-+ }
-+}
-+#endif
-+
-+static int __init init_lru_gen(void)
-+{
-+ BUILD_BUG_ON(MIN_NR_GENS + 1 >= MAX_NR_GENS);
-+ BUILD_BUG_ON(BIT(LRU_GEN_WIDTH) <= MAX_NR_GENS);
-+
-+ return 0;
-+};
-+late_initcall(init_lru_gen);
-+
-+#endif /* CONFIG_LRU_GEN */
-+
- static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
- {
- unsigned long nr[NR_LRU_LISTS];