aboutsummaryrefslogtreecommitdiffstats
path: root/target/linux/generic/backport-5.15/020-v6.3-24-mm-multi-gen-LRU-remove-aging-fairness-safeguard.patch
diff options
context:
space:
mode:
Diffstat (limited to 'target/linux/generic/backport-5.15/020-v6.3-24-mm-multi-gen-LRU-remove-aging-fairness-safeguard.patch')
-rw-r--r--target/linux/generic/backport-5.15/020-v6.3-24-mm-multi-gen-LRU-remove-aging-fairness-safeguard.patch292
1 files changed, 292 insertions, 0 deletions
diff --git a/target/linux/generic/backport-5.15/020-v6.3-24-mm-multi-gen-LRU-remove-aging-fairness-safeguard.patch b/target/linux/generic/backport-5.15/020-v6.3-24-mm-multi-gen-LRU-remove-aging-fairness-safeguard.patch
new file mode 100644
index 0000000000..82ba77dec2
--- /dev/null
+++ b/target/linux/generic/backport-5.15/020-v6.3-24-mm-multi-gen-LRU-remove-aging-fairness-safeguard.patch
@@ -0,0 +1,292 @@
+From e20b7386fccc18c791796eb1dc1a91eee3ccf801 Mon Sep 17 00:00:00 2001
+From: Yu Zhao <yuzhao@google.com>
+Date: Wed, 21 Dec 2022 21:19:02 -0700
+Subject: [PATCH 24/29] mm: multi-gen LRU: remove aging fairness safeguard
+
+Recall that the aging produces the youngest generation: first it scans
+for accessed pages and updates their gen counters; then it increments
+lrugen->max_seq.
+
+The current aging fairness safeguard for kswapd uses two passes to
+ensure the fairness to multiple eligible memcgs. On the first pass,
+which is shared with the eviction, it checks whether all eligible
+memcgs are low on cold pages. If so, it requires a second pass, on
+which it ages all those memcgs at the same time.
+
+With memcg LRU, the aging, while ensuring eventual fairness, will run
+when necessary. Therefore the current aging fairness safeguard for
+kswapd will not be needed.
+
+Note that memcg LRU only applies to global reclaim. For memcg reclaim,
+the aging can be unfair to different memcgs, i.e., their
+lrugen->max_seq can be incremented at different paces.
+
+Link: https://lkml.kernel.org/r/20221222041905.2431096-5-yuzhao@google.com
+Signed-off-by: Yu Zhao <yuzhao@google.com>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Jonathan Corbet <corbet@lwn.net>
+Cc: Michael Larabel <Michael@MichaelLarabel.com>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Mike Rapoport <rppt@kernel.org>
+Cc: Roman Gushchin <roman.gushchin@linux.dev>
+Cc: Suren Baghdasaryan <surenb@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+---
+ mm/vmscan.c | 126 ++++++++++++++++++++++++----------------------------
+ 1 file changed, 59 insertions(+), 67 deletions(-)
+
+diff --git a/mm/vmscan.c b/mm/vmscan.c
+index 40e7a947c5c7..7159436872ba 100644
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -131,7 +131,6 @@ struct scan_control {
+
+ #ifdef CONFIG_LRU_GEN
+ /* help kswapd make better choices among multiple memcgs */
+- unsigned int memcgs_need_aging:1;
+ unsigned long last_reclaimed;
+ #endif
+
+@@ -4184,7 +4183,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
+ return true;
+ }
+
+-static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsigned long *min_seq,
++static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq,
+ struct scan_control *sc, bool can_swap, unsigned long *nr_to_scan)
+ {
+ int gen, type, zone;
+@@ -4193,6 +4192,13 @@ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsig
+ unsigned long total = 0;
+ struct lru_gen_page *lrugen = &lruvec->lrugen;
+ struct mem_cgroup *memcg = lruvec_memcg(lruvec);
++ DEFINE_MIN_SEQ(lruvec);
++
++ /* whether this lruvec is completely out of cold pages */
++ if (min_seq[!can_swap] + MIN_NR_GENS > max_seq) {
++ *nr_to_scan = 0;
++ return true;
++ }
+
+ for (type = !can_swap; type < ANON_AND_FILE; type++) {
+ unsigned long seq;
+@@ -4221,8 +4227,6 @@ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsig
+ * stalls when the number of generations reaches MIN_NR_GENS. Hence, the
+ * ideal number of generations is MIN_NR_GENS+1.
+ */
+- if (min_seq[!can_swap] + MIN_NR_GENS > max_seq)
+- return true;
+ if (min_seq[!can_swap] + MIN_NR_GENS < max_seq)
+ return false;
+
+@@ -4241,40 +4245,54 @@ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsig
+ return false;
+ }
+
+-static bool age_lruvec(struct lruvec *lruvec, struct scan_control *sc, unsigned long min_ttl)
++static bool lruvec_is_sizable(struct lruvec *lruvec, struct scan_control *sc)
+ {
+- bool need_aging;
+- unsigned long nr_to_scan;
+- int swappiness = get_swappiness(lruvec, sc);
++ int gen, type, zone;
++ unsigned long total = 0;
++ bool can_swap = get_swappiness(lruvec, sc);
++ struct lru_gen_page *lrugen = &lruvec->lrugen;
+ struct mem_cgroup *memcg = lruvec_memcg(lruvec);
+ DEFINE_MAX_SEQ(lruvec);
+ DEFINE_MIN_SEQ(lruvec);
+
+- VM_WARN_ON_ONCE(sc->memcg_low_reclaim);
++ for (type = !can_swap; type < ANON_AND_FILE; type++) {
++ unsigned long seq;
+
+- mem_cgroup_calculate_protection(NULL, memcg);
++ for (seq = min_seq[type]; seq <= max_seq; seq++) {
++ gen = lru_gen_from_seq(seq);
+
+- if (mem_cgroup_below_min(memcg))
+- return false;
++ for (zone = 0; zone < MAX_NR_ZONES; zone++)
++ total += max(READ_ONCE(lrugen->nr_pages[gen][type][zone]), 0L);
++ }
++ }
+
+- need_aging = should_run_aging(lruvec, max_seq, min_seq, sc, swappiness, &nr_to_scan);
++ /* whether the size is big enough to be helpful */
++ return mem_cgroup_online(memcg) ? (total >> sc->priority) : total;
++}
+
+- if (min_ttl) {
+- int gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]);
+- unsigned long birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
++static bool lruvec_is_reclaimable(struct lruvec *lruvec, struct scan_control *sc,
++ unsigned long min_ttl)
++{
++ int gen;
++ unsigned long birth;
++ struct mem_cgroup *memcg = lruvec_memcg(lruvec);
++ DEFINE_MIN_SEQ(lruvec);
+
+- if (time_is_after_jiffies(birth + min_ttl))
+- return false;
++ VM_WARN_ON_ONCE(sc->memcg_low_reclaim);
+
+- /* the size is likely too small to be helpful */
+- if (!nr_to_scan && sc->priority != DEF_PRIORITY)
+- return false;
+- }
++ /* see the comment on lru_gen_page */
++ gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]);
++ birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
+
+- if (need_aging)
+- try_to_inc_max_seq(lruvec, max_seq, sc, swappiness, false);
++ if (time_is_after_jiffies(birth + min_ttl))
++ return false;
+
+- return true;
++ if (!lruvec_is_sizable(lruvec, sc))
++ return false;
++
++ mem_cgroup_calculate_protection(NULL, memcg);
++
++ return !mem_cgroup_below_min(memcg);
+ }
+
+ /* to protect the working set of the last N jiffies */
+@@ -4283,46 +4301,32 @@ static unsigned long lru_gen_min_ttl __read_mostly;
+ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
+ {
+ struct mem_cgroup *memcg;
+- bool success = false;
+ unsigned long min_ttl = READ_ONCE(lru_gen_min_ttl);
+
+ VM_WARN_ON_ONCE(!current_is_kswapd());
+
+ sc->last_reclaimed = sc->nr_reclaimed;
+
+- /*
+- * To reduce the chance of going into the aging path, which can be
+- * costly, optimistically skip it if the flag below was cleared in the
+- * eviction path. This improves the overall performance when multiple
+- * memcgs are available.
+- */
+- if (!sc->memcgs_need_aging) {
+- sc->memcgs_need_aging = true;
++ /* check the order to exclude compaction-induced reclaim */
++ if (!min_ttl || sc->order || sc->priority == DEF_PRIORITY)
+ return;
+- }
+-
+- set_mm_walk(pgdat);
+
+ memcg = mem_cgroup_iter(NULL, NULL, NULL);
+ do {
+ struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
+
+- if (age_lruvec(lruvec, sc, min_ttl))
+- success = true;
++ if (lruvec_is_reclaimable(lruvec, sc, min_ttl)) {
++ mem_cgroup_iter_break(NULL, memcg);
++ return;
++ }
+
+ cond_resched();
+ } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
+
+- clear_mm_walk();
+-
+- /* check the order to exclude compaction-induced reclaim */
+- if (success || !min_ttl || sc->order)
+- return;
+-
+ /*
+ * The main goal is to OOM kill if every generation from all memcgs is
+ * younger than min_ttl. However, another possibility is all memcgs are
+- * either below min or empty.
++ * either too small or below min.
+ */
+ if (mutex_trylock(&oom_lock)) {
+ struct oom_control oc = {
+@@ -4830,33 +4834,27 @@ static int evict_pages(struct lruvec *lruvec, struct scan_control *sc, int swapp
+ * reclaim.
+ */
+ static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc,
+- bool can_swap, bool *need_aging)
++ bool can_swap)
+ {
+ unsigned long nr_to_scan;
+ struct mem_cgroup *memcg = lruvec_memcg(lruvec);
+ DEFINE_MAX_SEQ(lruvec);
+- DEFINE_MIN_SEQ(lruvec);
+
+ if (mem_cgroup_below_min(memcg) ||
+ (mem_cgroup_below_low(memcg) && !sc->memcg_low_reclaim))
+ return 0;
+
+- *need_aging = should_run_aging(lruvec, max_seq, min_seq, sc, can_swap, &nr_to_scan);
+- if (!*need_aging)
++ if (!should_run_aging(lruvec, max_seq, sc, can_swap, &nr_to_scan))
+ return nr_to_scan;
+
+ /* skip the aging path at the default priority */
+ if (sc->priority == DEF_PRIORITY)
+- goto done;
++ return nr_to_scan;
+
+- /* leave the work to lru_gen_age_node() */
+- if (current_is_kswapd())
+- return 0;
++ try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false);
+
+- if (try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false))
+- return nr_to_scan;
+-done:
+- return min_seq[!can_swap] + MIN_NR_GENS <= max_seq ? nr_to_scan : 0;
++ /* skip this lruvec as it's low on cold pages */
++ return 0;
+ }
+
+ static unsigned long get_nr_to_reclaim(struct scan_control *sc)
+@@ -4875,9 +4873,7 @@ static unsigned long get_nr_to_reclaim(struct scan_control *sc)
+ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
+ {
+ struct blk_plug plug;
+- bool need_aging = false;
+ unsigned long scanned = 0;
+- unsigned long reclaimed = sc->nr_reclaimed;
+ unsigned long nr_to_reclaim = get_nr_to_reclaim(sc);
+
+ lru_add_drain();
+@@ -4898,13 +4894,13 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
+ else
+ swappiness = 0;
+
+- nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness, &need_aging);
++ nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness);
+ if (!nr_to_scan)
+- goto done;
++ break;
+
+ delta = evict_pages(lruvec, sc, swappiness);
+ if (!delta)
+- goto done;
++ break;
+
+ scanned += delta;
+ if (scanned >= nr_to_scan)
+@@ -4916,10 +4912,6 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
+ cond_resched();
+ }
+
+- /* see the comment in lru_gen_age_node() */
+- if (sc->nr_reclaimed - reclaimed >= MIN_LRU_BATCH && !need_aging)
+- sc->memcgs_need_aging = false;
+-done:
+ clear_mm_walk();
+
+ blk_finish_plug(&plug);
+--
+2.40.0
+