aboutsummaryrefslogtreecommitdiffstats
path: root/target/linux/generic/backport-5.15/020-v6.1-07-mm-multigenerational-lru-eviction.patch
diff options
context:
space:
mode:
Diffstat (limited to 'target/linux/generic/backport-5.15/020-v6.1-07-mm-multigenerational-lru-eviction.patch')
-rw-r--r--target/linux/generic/backport-5.15/020-v6.1-07-mm-multigenerational-lru-eviction.patch1002
1 files changed, 0 insertions, 1002 deletions
diff --git a/target/linux/generic/backport-5.15/020-v6.1-07-mm-multigenerational-lru-eviction.patch b/target/linux/generic/backport-5.15/020-v6.1-07-mm-multigenerational-lru-eviction.patch
deleted file mode 100644
index a75fedecaa..0000000000
--- a/target/linux/generic/backport-5.15/020-v6.1-07-mm-multigenerational-lru-eviction.patch
+++ /dev/null
@@ -1,1002 +0,0 @@
-From f4b881ce07ccb2a519f664afaa2a68225b612ca3 Mon Sep 17 00:00:00 2001
-From: Yu Zhao <yuzhao@google.com>
-Date: Tue, 29 Jun 2021 20:46:47 -0600
-Subject: [PATCH 07/10] mm: multigenerational lru: eviction
-
-The eviction consumes old generations. Given an lruvec, the eviction
-scans pages on lrugen->lists indexed by anon and file min_seq[]
-(modulo MAX_NR_GENS). It first tries to select a type based on the
-values of min_seq[]. If they are equal, it selects the type that has
-a lower refaulted %. The eviction sorts a page according to its
-updated generation number if the aging has found this page accessed.
-It also moves a page to the next generation if this page is from an
-upper tier that has a higher refaulted % than the base tier. The
-eviction increments min_seq[] of a selected type when it finds
-lrugen->lists indexed by min_seq[] of this selected type are empty.
-
-Each generation is divided into multiple tiers. Tiers represent
-different ranges of numbers of accesses from file descriptors only.
-Pages accessed N times via file descriptors belong to tier
-order_base_2(N). Each generation contains at most MAX_NR_TIERS tiers,
-and they require additional MAX_NR_TIERS-2 bits in page->flags. In
-contrast to moving between generations which requires list operations,
-moving between tiers only involves operations on page->flags and
-therefore has a negligible cost. A feedback loop modeled after the PID
-controller monitors refaulted % across all tiers and decides when to
-protect pages from which tiers.
-
-Unmapped pages are initially added to the oldest generation and then
-conditionally protected by tiers. Each tier keeps track of how many
-pages from it have refaulted. Tier 0 is the base tier and pages from
-it are evicted unconditionally because there are no better candidates.
-Pages from an upper tier are either evicted or moved to the next
-generation, depending on whether this upper tier has a higher
-refaulted % than the base tier. This model has the following
-advantages:
- 1) It removes the cost in the buffered access path and reduces the
- overall cost of protection because pages are conditionally protected
- in the reclaim path.
- 2) It takes mapped pages into account and avoids overprotecting
- pages accessed multiple times via file descriptors.
- 3 Additional tiers improve the protection of pages accessed more
- than twice.
-
-Signed-off-by: Yu Zhao <yuzhao@google.com>
-Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
-Change-Id: I64c06d8f2cdb83ac7d56c7e1d07f043483956cac
----
- include/linux/mm_inline.h | 10 +
- include/linux/mmzone.h | 33 +++
- mm/swap.c | 42 +++
- mm/vmscan.c | 555 +++++++++++++++++++++++++++++++++++++-
- mm/workingset.c | 120 ++++++++-
- 5 files changed, 757 insertions(+), 3 deletions(-)
-
---- a/include/linux/mm_inline.h
-+++ b/include/linux/mm_inline.h
-@@ -106,6 +106,14 @@ static inline int lru_hist_from_seq(unsi
- return seq % NR_HIST_GENS;
- }
-
-+/* Convert the number of accesses to a tier. See the comment on MAX_NR_TIERS. */
-+static inline int lru_tier_from_refs(int refs)
-+{
-+ VM_BUG_ON(refs > BIT(LRU_REFS_WIDTH));
-+
-+ return order_base_2(refs + 1);
-+}
-+
- /* The youngest and the second youngest generations are counted as active. */
- static inline bool lru_gen_is_active(struct lruvec *lruvec, int gen)
- {
-@@ -226,6 +234,8 @@ static inline bool lru_gen_del_page(stru
- gen = ((new_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
-
- new_flags &= ~LRU_GEN_MASK;
-+ if ((new_flags & LRU_REFS_FLAGS) != LRU_REFS_FLAGS)
-+ new_flags &= ~(LRU_REFS_MASK | LRU_REFS_FLAGS);
- /* for shrink_page_list() */
- if (reclaiming)
- new_flags &= ~(BIT(PG_referenced) | BIT(PG_reclaim));
---- a/include/linux/mmzone.h
-+++ b/include/linux/mmzone.h
-@@ -319,6 +319,30 @@ struct page_vma_mapped_walk;
- #define MIN_NR_GENS 2
- #define MAX_NR_GENS ((unsigned int)CONFIG_NR_LRU_GENS)
-
-+/*
-+ * Each generation is divided into multiple tiers. Tiers represent different
-+ * ranges of numbers of accesses from file descriptors, i.e.,
-+ * mark_page_accessed(). In contrast to moving between generations which
-+ * requires the lru lock, moving between tiers only involves an atomic
-+ * operation on page->flags and therefore has a negligible cost.
-+ *
-+ * The purposes of tiers are to:
-+ * 1) estimate whether pages accessed multiple times via file descriptors are
-+ * more active than pages accessed only via page tables by separating the two
-+ * access types into upper tiers and the base tier, and comparing refaulted %
-+ * across all tiers.
-+ * 2) improve buffered io performance by deferring the protection of pages
-+ * accessed multiple times until the eviction. That is the protection happens
-+ * in the reclaim path, not the access path.
-+ *
-+ * Pages accessed N times via file descriptors belong to tier order_base_2(N).
-+ * The base tier may be marked by PageReferenced(). All upper tiers are marked
-+ * by PageReferenced() && PageWorkingset(). Additional bits from page->flags are
-+ * used to support more than one upper tier.
-+ */
-+#define MAX_NR_TIERS ((unsigned int)CONFIG_TIERS_PER_GEN)
-+#define LRU_REFS_FLAGS (BIT(PG_referenced) | BIT(PG_workingset))
-+
- /* Whether to keep stats for historical generations. */
- #ifdef CONFIG_LRU_GEN_STATS
- #define NR_HIST_GENS ((unsigned int)CONFIG_NR_LRU_GENS)
-@@ -337,6 +361,15 @@ struct lrugen {
- struct list_head lists[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
- /* the sizes of the multigenerational lru lists in pages */
- unsigned long sizes[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
-+ /* the exponential moving average of refaulted */
-+ unsigned long avg_refaulted[ANON_AND_FILE][MAX_NR_TIERS];
-+ /* the exponential moving average of protected+evicted */
-+ unsigned long avg_total[ANON_AND_FILE][MAX_NR_TIERS];
-+ /* the base tier isn't protected, hence the minus one */
-+ unsigned long protected[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS - 1];
-+ /* incremented without holding the lru lock */
-+ atomic_long_t evicted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
-+ atomic_long_t refaulted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
- /* whether the multigenerational lru is enabled */
- bool enabled[ANON_AND_FILE];
- };
---- a/mm/swap.c
-+++ b/mm/swap.c
-@@ -389,6 +389,43 @@ static void __lru_cache_activate_page(st
- local_unlock(&lru_pvecs.lock);
- }
-
-+#ifdef CONFIG_LRU_GEN
-+static void page_inc_refs(struct page *page)
-+{
-+ unsigned long refs;
-+ unsigned long old_flags, new_flags;
-+
-+ if (PageUnevictable(page))
-+ return;
-+
-+ /* see the comment on MAX_NR_TIERS */
-+ do {
-+ new_flags = old_flags = READ_ONCE(page->flags);
-+
-+ if (!(new_flags & BIT(PG_referenced))) {
-+ new_flags |= BIT(PG_referenced);
-+ continue;
-+ }
-+
-+ if (!(new_flags & BIT(PG_workingset))) {
-+ new_flags |= BIT(PG_workingset);
-+ continue;
-+ }
-+
-+ refs = new_flags & LRU_REFS_MASK;
-+ refs = min(refs + BIT(LRU_REFS_PGOFF), LRU_REFS_MASK);
-+
-+ new_flags &= ~LRU_REFS_MASK;
-+ new_flags |= refs;
-+ } while (new_flags != old_flags &&
-+ cmpxchg(&page->flags, old_flags, new_flags) != old_flags);
-+}
-+#else
-+static void page_inc_refs(struct page *page)
-+{
-+}
-+#endif /* CONFIG_LRU_GEN */
-+
- /*
- * Mark a page as having seen activity.
- *
-@@ -403,6 +440,11 @@ void mark_page_accessed(struct page *pag
- {
- page = compound_head(page);
-
-+ if (lru_gen_enabled()) {
-+ page_inc_refs(page);
-+ return;
-+ }
-+
- if (!PageReferenced(page)) {
- SetPageReferenced(page);
- } else if (PageUnevictable(page)) {
---- a/mm/vmscan.c
-+++ b/mm/vmscan.c
-@@ -1145,9 +1145,11 @@ static int __remove_mapping(struct addre
-
- if (PageSwapCache(page)) {
- swp_entry_t swap = { .val = page_private(page) };
-- mem_cgroup_swapout(page, swap);
-+
-+ /* get a shadow entry before page_memcg() is cleared */
- if (reclaimed && !mapping_exiting(mapping))
- shadow = workingset_eviction(page, target_memcg);
-+ mem_cgroup_swapout(page, swap);
- __delete_from_swap_cache(page, swap, shadow);
- xa_unlock_irq(&mapping->i_pages);
- put_swap_page(page, swap);
-@@ -1410,6 +1412,11 @@ retry:
- if (!sc->may_unmap && page_mapped(page))
- goto keep_locked;
-
-+ /* lru_gen_look_around() has updated this page? */
-+ if (lru_gen_enabled() && !ignore_references &&
-+ page_mapped(page) && PageReferenced(page))
-+ goto keep_locked;
-+
- may_enter_fs = (sc->gfp_mask & __GFP_FS) ||
- (PageSwapCache(page) && (sc->gfp_mask & __GFP_IO));
-
-@@ -2505,6 +2512,9 @@ static void prepare_scan_count(pg_data_t
- unsigned long file;
- struct lruvec *target_lruvec;
-
-+ if (lru_gen_enabled())
-+ return;
-+
- target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
-
- /*
-@@ -2845,6 +2855,17 @@ static int page_lru_gen(struct page *pag
- return ((flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
- }
-
-+static int page_lru_tier(struct page *page)
-+{
-+ int refs;
-+ unsigned long flags = READ_ONCE(page->flags);
-+
-+ refs = (flags & LRU_REFS_FLAGS) == LRU_REFS_FLAGS ?
-+ ((flags & LRU_REFS_MASK) >> LRU_REFS_PGOFF) + 1 : 0;
-+
-+ return lru_tier_from_refs(refs);
-+}
-+
- static int get_swappiness(struct mem_cgroup *memcg)
- {
- return mem_cgroup_get_nr_swap_pages(memcg) >= MIN_BATCH_SIZE ?
-@@ -3181,6 +3202,91 @@ done:
- }
-
- /******************************************************************************
-+ * refault feedback loop
-+ ******************************************************************************/
-+
-+/*
-+ * A feedback loop modeled after the PID controller. Currently supports the
-+ * proportional (P) and the integral (I) terms; the derivative (D) term can be
-+ * added if necessary. The setpoint (SP) is the desired position; the process
-+ * variable (PV) is the measured position. The error is the difference between
-+ * the SP and the PV. A positive error results in a positive control output
-+ * correction, which, in our case, is to allow eviction.
-+ *
-+ * The P term is refaulted % of the current generation being evicted. The I
-+ * term is the exponential moving average of refaulted % of previously evicted
-+ * generations, using the smoothing factor 1/2.
-+ *
-+ * Our goal is to maintain proportional refaulted % across all tiers.
-+ */
-+struct ctrl_pos {
-+ unsigned long refaulted;
-+ unsigned long total;
-+ int gain;
-+};
-+
-+static void read_ctrl_pos(struct lruvec *lruvec, int type, int tier, int gain,
-+ struct ctrl_pos *pos)
-+{
-+ struct lrugen *lrugen = &lruvec->evictable;
-+ int hist = lru_hist_from_seq(lrugen->min_seq[type]);
-+
-+ pos->refaulted = lrugen->avg_refaulted[type][tier] +
-+ atomic_long_read(&lrugen->refaulted[hist][type][tier]);
-+ pos->total = lrugen->avg_total[type][tier] +
-+ atomic_long_read(&lrugen->evicted[hist][type][tier]);
-+ if (tier)
-+ pos->total += lrugen->protected[hist][type][tier - 1];
-+ pos->gain = gain;
-+}
-+
-+static void reset_ctrl_pos(struct lruvec *lruvec, int gen, int type)
-+{
-+ int tier;
-+ int hist = lru_hist_from_seq(gen);
-+ struct lrugen *lrugen = &lruvec->evictable;
-+ bool carryover = gen == lru_gen_from_seq(lrugen->min_seq[type]);
-+ bool clear = carryover ? NR_HIST_GENS == 1 : NR_HIST_GENS > 1;
-+
-+ if (!carryover && !clear)
-+ return;
-+
-+ for (tier = 0; tier < MAX_NR_TIERS; tier++) {
-+ if (carryover) {
-+ unsigned long sum;
-+
-+ sum = lrugen->avg_refaulted[type][tier] +
-+ atomic_long_read(&lrugen->refaulted[hist][type][tier]);
-+ WRITE_ONCE(lrugen->avg_refaulted[type][tier], sum / 2);
-+
-+ sum = lrugen->avg_total[type][tier] +
-+ atomic_long_read(&lrugen->evicted[hist][type][tier]);
-+ if (tier)
-+ sum += lrugen->protected[hist][type][tier - 1];
-+ WRITE_ONCE(lrugen->avg_total[type][tier], sum / 2);
-+ }
-+
-+ if (clear) {
-+ atomic_long_set(&lrugen->refaulted[hist][type][tier], 0);
-+ atomic_long_set(&lrugen->evicted[hist][type][tier], 0);
-+ if (tier)
-+ WRITE_ONCE(lrugen->protected[hist][type][tier - 1], 0);
-+ }
-+ }
-+}
-+
-+static bool positive_ctrl_err(struct ctrl_pos *sp, struct ctrl_pos *pv)
-+{
-+ /*
-+ * Allow eviction if the PV has a limited number of refaulted pages or a
-+ * lower refaulted % than the SP.
-+ */
-+ return pv->refaulted < MIN_BATCH_SIZE ||
-+ pv->refaulted * max(sp->total, 1UL) * sp->gain <=
-+ sp->refaulted * max(pv->total, 1UL) * pv->gain;
-+}
-+
-+/******************************************************************************
- * the aging
- ******************************************************************************/
-
-@@ -3200,6 +3306,7 @@ static int page_update_gen(struct page *
-
- new_flags &= ~LRU_GEN_MASK;
- new_flags |= (gen + 1UL) << LRU_GEN_PGOFF;
-+ new_flags &= ~(LRU_REFS_MASK | LRU_REFS_FLAGS);
- } while (new_flags != old_flags &&
- cmpxchg(&page->flags, old_flags, new_flags) != old_flags);
-
-@@ -3231,6 +3338,7 @@ static void page_inc_gen(struct page *pa
-
- new_flags &= ~LRU_GEN_MASK;
- new_flags |= (new_gen + 1UL) << LRU_GEN_PGOFF;
-+ new_flags &= ~(LRU_REFS_MASK | LRU_REFS_FLAGS);
- /* for end_page_writeback() */
- if (reclaiming)
- new_flags |= BIT(PG_reclaim);
-@@ -3722,6 +3830,7 @@ static bool inc_min_seq(struct lruvec *l
- }
- }
-
-+ reset_ctrl_pos(lruvec, gen, type);
- WRITE_ONCE(lrugen->min_seq[type], lrugen->min_seq[type] + 1);
-
- return true;
-@@ -3759,6 +3868,8 @@ next:
- if (min_seq[type] == lrugen->min_seq[type])
- continue;
-
-+ gen = lru_gen_from_seq(lrugen->min_seq[type]);
-+ reset_ctrl_pos(lruvec, gen, type);
- WRITE_ONCE(lrugen->min_seq[type], min_seq[type]);
- success = true;
- }
-@@ -3820,6 +3931,9 @@ static void inc_max_seq(struct lruvec *l
- }
- }
-
-+ for (type = 0; type < ANON_AND_FILE; type++)
-+ reset_ctrl_pos(lruvec, gen, type);
-+
- WRITE_ONCE(lrugen->timestamps[gen], jiffies);
- /* make sure all preceding modifications appear first */
- smp_store_release(&lrugen->max_seq, lrugen->max_seq + 1);
-@@ -4101,6 +4215,433 @@ void lru_gen_look_around(struct page_vma
- }
-
- /******************************************************************************
-+ * the eviction
-+ ******************************************************************************/
-+
-+static bool sort_page(struct page *page, struct lruvec *lruvec, int tier_idx)
-+{
-+ bool success;
-+ int gen = page_lru_gen(page);
-+ int type = page_is_file_lru(page);
-+ int zone = page_zonenum(page);
-+ int tier = page_lru_tier(page);
-+ int delta = thp_nr_pages(page);
-+ struct lrugen *lrugen = &lruvec->evictable;
-+
-+ VM_BUG_ON_PAGE(gen >= MAX_NR_GENS, page);
-+
-+ /* an mlocked page? */
-+ if (!page_evictable(page)) {
-+ success = lru_gen_del_page(page, lruvec, true);
-+ VM_BUG_ON_PAGE(!success, page);
-+ SetPageUnevictable(page);
-+ add_page_to_lru_list(page, lruvec);
-+ __count_vm_events(UNEVICTABLE_PGCULLED, delta);
-+ return true;
-+ }
-+
-+ /* a lazy-free page that has been written into? */
-+ if (type && PageDirty(page) && PageAnon(page)) {
-+ success = lru_gen_del_page(page, lruvec, true);
-+ VM_BUG_ON_PAGE(!success, page);
-+ SetPageSwapBacked(page);
-+ add_page_to_lru_list_tail(page, lruvec);
-+ return true;
-+ }
-+
-+ /* page_update_gen() has updated this page? */
-+ if (gen != lru_gen_from_seq(lrugen->min_seq[type])) {
-+ list_move(&page->lru, &lrugen->lists[gen][type][zone]);
-+ return true;
-+ }
-+
-+ /* protect this page if its tier has a higher refaulted % */
-+ if (tier > tier_idx) {
-+ int hist = lru_hist_from_seq(gen);
-+
-+ page_inc_gen(page, lruvec, false);
-+ WRITE_ONCE(lrugen->protected[hist][type][tier - 1],
-+ lrugen->protected[hist][type][tier - 1] + delta);
-+ __mod_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + type, delta);
-+ return true;
-+ }
-+
-+ /* mark this page for reclaim if it's pending writeback */
-+ if (PageWriteback(page) || (type && PageDirty(page))) {
-+ page_inc_gen(page, lruvec, true);
-+ return true;
-+ }
-+
-+ return false;
-+}
-+
-+static bool isolate_page(struct page *page, struct lruvec *lruvec, struct scan_control *sc)
-+{
-+ bool success;
-+
-+ if (!sc->may_unmap && page_mapped(page))
-+ return false;
-+
-+ if (!(sc->may_writepage && (sc->gfp_mask & __GFP_IO)) &&
-+ (PageDirty(page) || (PageAnon(page) && !PageSwapCache(page))))
-+ return false;
-+
-+ if (!get_page_unless_zero(page))
-+ return false;
-+
-+ if (!TestClearPageLRU(page)) {
-+ put_page(page);
-+ return false;
-+ }
-+
-+ success = lru_gen_del_page(page, lruvec, true);
-+ VM_BUG_ON_PAGE(!success, page);
-+
-+ return true;
-+}
-+
-+static int scan_pages(struct lruvec *lruvec, struct scan_control *sc,
-+ int type, int tier, struct list_head *list)
-+{
-+ int gen, zone;
-+ enum vm_event_item item;
-+ int sorted = 0;
-+ int scanned = 0;
-+ int isolated = 0;
-+ int remaining = MAX_BATCH_SIZE;
-+ struct lrugen *lrugen = &lruvec->evictable;
-+ struct mem_cgroup *memcg = lruvec_memcg(lruvec);
-+
-+ VM_BUG_ON(!list_empty(list));
-+
-+ if (get_nr_gens(lruvec, type) == MIN_NR_GENS)
-+ return 0;
-+
-+ gen = lru_gen_from_seq(lrugen->min_seq[type]);
-+
-+ for (zone = sc->reclaim_idx; zone >= 0; zone--) {
-+ LIST_HEAD(moved);
-+ int skipped = 0;
-+ struct list_head *head = &lrugen->lists[gen][type][zone];
-+
-+ while (!list_empty(head)) {
-+ struct page *page = lru_to_page(head);
-+ int delta = thp_nr_pages(page);
-+
-+ VM_BUG_ON_PAGE(PageTail(page), page);
-+ VM_BUG_ON_PAGE(PageUnevictable(page), page);
-+ VM_BUG_ON_PAGE(PageActive(page), page);
-+ VM_BUG_ON_PAGE(page_is_file_lru(page) != type, page);
-+ VM_BUG_ON_PAGE(page_zonenum(page) != zone, page);
-+
-+ prefetchw_prev_lru_page(page, head, flags);
-+
-+ scanned += delta;
-+
-+ if (sort_page(page, lruvec, tier))
-+ sorted += delta;
-+ else if (isolate_page(page, lruvec, sc)) {
-+ list_add(&page->lru, list);
-+ isolated += delta;
-+ } else {
-+ list_move(&page->lru, &moved);
-+ skipped += delta;
-+ }
-+
-+ if (!--remaining || max(isolated, skipped) >= MIN_BATCH_SIZE)
-+ break;
-+ }
-+
-+ if (skipped) {
-+ list_splice(&moved, head);
-+ __count_zid_vm_events(PGSCAN_SKIP, zone, skipped);
-+ }
-+
-+ if (!remaining || isolated >= MIN_BATCH_SIZE)
-+ break;
-+ }
-+
-+ item = current_is_kswapd() ? PGSCAN_KSWAPD : PGSCAN_DIRECT;
-+ if (!cgroup_reclaim(sc)) {
-+ __count_vm_events(item, isolated);
-+ __count_vm_events(PGREFILL, sorted);
-+ }
-+ __count_memcg_events(memcg, item, isolated);
-+ __count_memcg_events(memcg, PGREFILL, sorted);
-+ __count_vm_events(PGSCAN_ANON + type, isolated);
-+
-+ /*
-+ * We may have trouble finding eligible pages due to reclaim_idx,
-+ * may_unmap and may_writepage. Check `remaining` to make sure we won't
-+ * be stuck if we aren't making enough progress.
-+ */
-+ return isolated || !remaining ? scanned : 0;
-+}
-+
-+static int get_tier_idx(struct lruvec *lruvec, int type)
-+{
-+ int tier;
-+ struct ctrl_pos sp, pv;
-+
-+ /*
-+ * Ideally we don't want to evict upper tiers that have higher refaulted
-+ * %. However, we need to leave a margin for the fluctuation in
-+ * refaulted %. So we use a larger gain factor to make sure upper tiers
-+ * are indeed more active. We choose 2 because the lowest upper tier
-+ * would have twice of refaulted % of the base tier, according to their
-+ * numbers of accesses.
-+ */
-+ read_ctrl_pos(lruvec, type, 0, 1, &sp);
-+ for (tier = 1; tier < MAX_NR_TIERS; tier++) {
-+ read_ctrl_pos(lruvec, type, tier, 2, &pv);
-+ if (!positive_ctrl_err(&sp, &pv))
-+ break;
-+ }
-+
-+ return tier - 1;
-+}
-+
-+static int get_type_to_scan(struct lruvec *lruvec, int swappiness, int *tier_idx)
-+{
-+ int type, tier;
-+ struct ctrl_pos sp, pv;
-+ int gain[ANON_AND_FILE] = { swappiness, 200 - swappiness };
-+
-+ /*
-+ * Compare refaulted % between the base tiers of anon and file to
-+ * determine which type to evict. Also need to compare refaulted % of
-+ * the upper tiers of the selected type with that of the base tier of
-+ * the other type to determine which tier of the selected type to evict.
-+ */
-+ read_ctrl_pos(lruvec, 0, 0, gain[0], &sp);
-+ read_ctrl_pos(lruvec, 1, 0, gain[1], &pv);
-+ type = positive_ctrl_err(&sp, &pv);
-+
-+ read_ctrl_pos(lruvec, !type, 0, gain[!type], &sp);
-+ for (tier = 1; tier < MAX_NR_TIERS; tier++) {
-+ read_ctrl_pos(lruvec, type, tier, gain[type], &pv);
-+ if (!positive_ctrl_err(&sp, &pv))
-+ break;
-+ }
-+
-+ *tier_idx = tier - 1;
-+
-+ return type;
-+}
-+
-+static int isolate_pages(struct lruvec *lruvec, struct scan_control *sc, int swappiness,
-+ int *type_scanned, struct list_head *list)
-+{
-+ int i;
-+ int type;
-+ int scanned;
-+ int tier = -1;
-+ DEFINE_MIN_SEQ(lruvec);
-+
-+ VM_BUG_ON(!seq_is_valid(lruvec));
-+
-+ /*
-+ * Try to select a type based on generations and swappiness, and if that
-+ * fails, fall back to get_type_to_scan(). When anon and file are both
-+ * available from the same generation, swappiness 200 is interpreted as
-+ * anon first and swappiness 1 is interpreted as file first.
-+ */
-+ if (!swappiness)
-+ type = 1;
-+ else if (min_seq[0] < min_seq[1])
-+ type = 0;
-+ else if (swappiness == 1)
-+ type = 1;
-+ else if (swappiness == 200)
-+ type = 0;
-+ else
-+ type = get_type_to_scan(lruvec, swappiness, &tier);
-+
-+ for (i = !swappiness; i < ANON_AND_FILE; i++) {
-+ if (tier < 0)
-+ tier = get_tier_idx(lruvec, type);
-+
-+ scanned = scan_pages(lruvec, sc, type, tier, list);
-+ if (scanned)
-+ break;
-+
-+ type = !type;
-+ tier = -1;
-+ }
-+
-+ *type_scanned = type;
-+
-+ return scanned;
-+}
-+
-+/* Main function used by the foreground, the background and the user-triggered eviction. */
-+static int evict_pages(struct lruvec *lruvec, struct scan_control *sc, int swappiness)
-+{
-+ int type;
-+ int scanned;
-+ int reclaimed;
-+ LIST_HEAD(list);
-+ struct page *page;
-+ enum vm_event_item item;
-+ struct reclaim_stat stat;
-+ struct mm_walk_args *args;
-+ struct mem_cgroup *memcg = lruvec_memcg(lruvec);
-+ struct pglist_data *pgdat = lruvec_pgdat(lruvec);
-+
-+ spin_lock_irq(&lruvec->lru_lock);
-+
-+ scanned = isolate_pages(lruvec, sc, swappiness, &type, &list);
-+
-+ if (try_to_inc_min_seq(lruvec, swappiness))
-+ scanned++;
-+
-+ if (get_nr_gens(lruvec, 1) == MIN_NR_GENS)
-+ scanned = 0;
-+
-+ spin_unlock_irq(&lruvec->lru_lock);
-+
-+ if (list_empty(&list))
-+ return scanned;
-+
-+ reclaimed = shrink_page_list(&list, pgdat, sc, &stat, false);
-+ /*
-+ * We need to prevent rejected pages from being added back to the same
-+ * lists they were isolated from. Otherwise we may risk looping on them
-+ * forever.
-+ */
-+ list_for_each_entry(page, &list, lru) {
-+ if (!PageReclaim(page) || !(PageDirty(page) || PageWriteback(page)))
-+ SetPageActive(page);
-+
-+ ClearPageReferenced(page);
-+ ClearPageWorkingset(page);
-+ }
-+
-+ spin_lock_irq(&lruvec->lru_lock);
-+
-+ move_pages_to_lru(lruvec, &list);
-+
-+ args = current->reclaim_state ? current->reclaim_state->mm_walk_args : NULL;
-+ if (args && args->batch_size)
-+ reset_batch_size(lruvec, args);
-+
-+ item = current_is_kswapd() ? PGSTEAL_KSWAPD : PGSTEAL_DIRECT;
-+ if (!cgroup_reclaim(sc))
-+ __count_vm_events(item, reclaimed);
-+ __count_memcg_events(memcg, item, reclaimed);
-+ __count_vm_events(PGSTEAL_ANON + type, reclaimed);
-+
-+ spin_unlock_irq(&lruvec->lru_lock);
-+
-+ mem_cgroup_uncharge_list(&list);
-+ free_unref_page_list(&list);
-+
-+ sc->nr_reclaimed += reclaimed;
-+
-+ return scanned;
-+}
-+
-+static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, int swappiness)
-+{
-+ bool low;
-+ long nr_to_scan;
-+ struct mem_cgroup *memcg = lruvec_memcg(lruvec);
-+ int priority = sc->priority;
-+ DEFINE_MAX_SEQ(lruvec);
-+ DEFINE_MIN_SEQ(lruvec);
-+
-+ if (mem_cgroup_below_min(memcg) ||
-+ (mem_cgroup_below_low(memcg) && !sc->memcg_low_reclaim))
-+ return 0;
-+
-+ if (sc->nr_reclaimed >= sc->nr_to_reclaim) {
-+ priority = DEF_PRIORITY;
-+ sc->force_deactivate = 0;
-+ }
-+
-+ nr_to_scan = get_nr_evictable(lruvec, sc, swappiness, max_seq, min_seq, &low);
-+ if (!nr_to_scan)
-+ return 0;
-+
-+ nr_to_scan >>= priority;
-+
-+ if (!mem_cgroup_online(memcg))
-+ nr_to_scan++;
-+
-+ if (!nr_to_scan)
-+ return 0;
-+
-+ if (current_is_kswapd()) {
-+ /* leave the work to lru_gen_age_node() */
-+ if (max_seq - min_seq[1] < MIN_NR_GENS)
-+ return 0;
-+
-+ if (!low)
-+ sc->force_deactivate = 0;
-+
-+ return nr_to_scan;
-+ }
-+
-+ if (max_seq - min_seq[1] >= MIN_NR_GENS)
-+ return nr_to_scan;
-+
-+ /* move onto slab and other memcgs if we haven't tried them all */
-+ if (!sc->force_deactivate) {
-+ sc->skipped_deactivate = 1;
-+ return 0;
-+ }
-+
-+ return try_to_inc_max_seq(lruvec, sc, swappiness, max_seq, true) ? nr_to_scan : 0;
-+}
-+
-+static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
-+{
-+ struct blk_plug plug;
-+ long scanned = 0;
-+ struct mem_cgroup *memcg = lruvec_memcg(lruvec);
-+ struct pglist_data *pgdat = lruvec_pgdat(lruvec);
-+
-+ lru_add_drain();
-+
-+ if (current_is_kswapd())
-+ current->reclaim_state->mm_walk_args = &pgdat->mm_walk_args;
-+
-+ blk_start_plug(&plug);
-+
-+ while (true) {
-+ int delta;
-+ int swappiness;
-+ long nr_to_scan;
-+
-+ if (sc->may_swap)
-+ swappiness = get_swappiness(memcg);
-+ else if (!cgroup_reclaim(sc) && get_swappiness(memcg))
-+ swappiness = 1;
-+ else
-+ swappiness = 0;
-+
-+ nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness);
-+ if (!nr_to_scan)
-+ break;
-+
-+ delta = evict_pages(lruvec, sc, swappiness);
-+ if (!delta)
-+ break;
-+
-+ scanned += delta;
-+ if (scanned >= nr_to_scan)
-+ break;
-+
-+ cond_resched();
-+ }
-+
-+ blk_finish_plug(&plug);
-+
-+ if (current_is_kswapd())
-+ current->reclaim_state->mm_walk_args = NULL;
-+}
-+
-+/******************************************************************************
- * state change
- ******************************************************************************/
-
-@@ -4355,6 +4896,10 @@ static void lru_gen_age_node(struct pgli
- {
- }
-
-+static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
-+{
-+}
-+
- #endif /* CONFIG_LRU_GEN */
-
- static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
-@@ -4368,6 +4913,11 @@ static void shrink_lruvec(struct lruvec
- bool proportional_reclaim;
- struct blk_plug plug;
-
-+ if (lru_gen_enabled()) {
-+ lru_gen_shrink_lruvec(lruvec, sc);
-+ return;
-+ }
-+
- get_scan_count(lruvec, sc, nr);
-
- /* Record the original scan target for proportional adjustments later */
-@@ -4839,6 +5389,9 @@ static void snapshot_refaults(struct mem
- struct lruvec *target_lruvec;
- unsigned long refaults;
-
-+ if (lru_gen_enabled())
-+ return;
-+
- target_lruvec = mem_cgroup_lruvec(target_memcg, pgdat);
- refaults = lruvec_page_state(target_lruvec, WORKINGSET_ACTIVATE_ANON);
- target_lruvec->refaults[0] = refaults;
---- a/mm/workingset.c
-+++ b/mm/workingset.c
-@@ -187,7 +187,6 @@ static unsigned int bucket_order __read_
- static void *pack_shadow(int memcgid, pg_data_t *pgdat, unsigned long eviction,
- bool workingset)
- {
-- eviction >>= bucket_order;
- eviction &= EVICTION_MASK;
- eviction = (eviction << MEM_CGROUP_ID_SHIFT) | memcgid;
- eviction = (eviction << NODES_SHIFT) | pgdat->node_id;
-@@ -212,10 +211,117 @@ static void unpack_shadow(void *shadow,
-
- *memcgidp = memcgid;
- *pgdat = NODE_DATA(nid);
-- *evictionp = entry << bucket_order;
-+ *evictionp = entry;
- *workingsetp = workingset;
- }
-
-+#ifdef CONFIG_LRU_GEN
-+
-+static int page_lru_refs(struct page *page)
-+{
-+ unsigned long flags = READ_ONCE(page->flags);
-+
-+ BUILD_BUG_ON(LRU_GEN_WIDTH + LRU_REFS_WIDTH > BITS_PER_LONG - EVICTION_SHIFT);
-+
-+ /* see the comment on MAX_NR_TIERS */
-+ return flags & BIT(PG_workingset) ? (flags & LRU_REFS_MASK) >> LRU_REFS_PGOFF : 0;
-+}
-+
-+/* Return a token to be stored in the shadow entry of a page being evicted. */
-+static void *lru_gen_eviction(struct page *page)
-+{
-+ int hist, tier;
-+ unsigned long token;
-+ unsigned long min_seq;
-+ struct lruvec *lruvec;
-+ struct lrugen *lrugen;
-+ int type = page_is_file_lru(page);
-+ int refs = page_lru_refs(page);
-+ int delta = thp_nr_pages(page);
-+ bool workingset = PageWorkingset(page);
-+ struct mem_cgroup *memcg = page_memcg(page);
-+ struct pglist_data *pgdat = page_pgdat(page);
-+
-+ lruvec = mem_cgroup_lruvec(memcg, pgdat);
-+ lrugen = &lruvec->evictable;
-+ min_seq = READ_ONCE(lrugen->min_seq[type]);
-+ token = (min_seq << LRU_REFS_WIDTH) | refs;
-+
-+ hist = lru_hist_from_seq(min_seq);
-+ tier = lru_tier_from_refs(refs + workingset);
-+ atomic_long_add(delta, &lrugen->evicted[hist][type][tier]);
-+
-+ return pack_shadow(mem_cgroup_id(memcg), pgdat, token, workingset);
-+}
-+
-+/* Count a refaulted page based on the token stored in its shadow entry. */
-+static void lru_gen_refault(struct page *page, void *shadow)
-+{
-+ int hist, tier, refs;
-+ int memcg_id;
-+ bool workingset;
-+ unsigned long token;
-+ unsigned long min_seq;
-+ struct lruvec *lruvec;
-+ struct lrugen *lrugen;
-+ struct mem_cgroup *memcg;
-+ struct pglist_data *pgdat;
-+ int type = page_is_file_lru(page);
-+ int delta = thp_nr_pages(page);
-+
-+ unpack_shadow(shadow, &memcg_id, &pgdat, &token, &workingset);
-+ if (page_pgdat(page) != pgdat)
-+ return;
-+
-+ rcu_read_lock();
-+ memcg = page_memcg_rcu(page);
-+ if (mem_cgroup_id(memcg) != memcg_id)
-+ goto unlock;
-+
-+ refs = token & (BIT(LRU_REFS_WIDTH) - 1);
-+ if (refs && !workingset)
-+ goto unlock;
-+
-+ token >>= LRU_REFS_WIDTH;
-+ lruvec = mem_cgroup_lruvec(memcg, pgdat);
-+ lrugen = &lruvec->evictable;
-+ min_seq = READ_ONCE(lrugen->min_seq[type]);
-+ if (token != (min_seq & (EVICTION_MASK >> LRU_REFS_WIDTH)))
-+ goto unlock;
-+
-+ hist = lru_hist_from_seq(min_seq);
-+ tier = lru_tier_from_refs(refs + workingset);
-+ atomic_long_add(delta, &lrugen->refaulted[hist][type][tier]);
-+ mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + type, delta);
-+
-+ /*
-+ * Tiers don't offer any protection to pages accessed via page tables.
-+ * That's what generations do. Tiers can't fully protect pages after
-+ * their numbers of accesses has exceeded the max value. Conservatively
-+ * count these two conditions as stalls even though they might not
-+ * indicate any real memory pressure.
-+ */
-+ if (task_in_nonseq_fault() || refs + workingset == BIT(LRU_REFS_WIDTH)) {
-+ SetPageWorkingset(page);
-+ mod_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + type, delta);
-+ }
-+unlock:
-+ rcu_read_unlock();
-+}
-+
-+#else
-+
-+static void *lru_gen_eviction(struct page *page)
-+{
-+ return NULL;
-+}
-+
-+static void lru_gen_refault(struct page *page, void *shadow)
-+{
-+}
-+
-+#endif /* CONFIG_LRU_GEN */
-+
- /**
- * workingset_age_nonresident - age non-resident entries as LRU ages
- * @lruvec: the lruvec that was aged
-@@ -264,10 +370,14 @@ void *workingset_eviction(struct page *p
- VM_BUG_ON_PAGE(page_count(page), page);
- VM_BUG_ON_PAGE(!PageLocked(page), page);
-
-+ if (lru_gen_enabled())
-+ return lru_gen_eviction(page);
-+
- lruvec = mem_cgroup_lruvec(target_memcg, pgdat);
- /* XXX: target_memcg can be NULL, go through lruvec */
- memcgid = mem_cgroup_id(lruvec_memcg(lruvec));
- eviction = atomic_long_read(&lruvec->nonresident_age);
-+ eviction >>= bucket_order;
- workingset_age_nonresident(lruvec, thp_nr_pages(page));
- return pack_shadow(memcgid, pgdat, eviction, PageWorkingset(page));
- }
-@@ -296,7 +406,13 @@ void workingset_refault(struct page *pag
- bool workingset;
- int memcgid;
-
-+ if (lru_gen_enabled()) {
-+ lru_gen_refault(page, shadow);
-+ return;
-+ }
-+
- unpack_shadow(shadow, &memcgid, &pgdat, &eviction, &workingset);
-+ eviction <<= bucket_order;
-
- rcu_read_lock();
- /*