diff options
Diffstat (limited to 'target/linux/generic/backport-5.15/020-v6.3-24-mm-multi-gen-LRU-remove-aging-fairness-safeguard.patch')
-rw-r--r-- | target/linux/generic/backport-5.15/020-v6.3-24-mm-multi-gen-LRU-remove-aging-fairness-safeguard.patch | 292 |
1 files changed, 292 insertions, 0 deletions
diff --git a/target/linux/generic/backport-5.15/020-v6.3-24-mm-multi-gen-LRU-remove-aging-fairness-safeguard.patch b/target/linux/generic/backport-5.15/020-v6.3-24-mm-multi-gen-LRU-remove-aging-fairness-safeguard.patch new file mode 100644 index 0000000000..82ba77dec2 --- /dev/null +++ b/target/linux/generic/backport-5.15/020-v6.3-24-mm-multi-gen-LRU-remove-aging-fairness-safeguard.patch @@ -0,0 +1,292 @@ +From e20b7386fccc18c791796eb1dc1a91eee3ccf801 Mon Sep 17 00:00:00 2001 +From: Yu Zhao <yuzhao@google.com> +Date: Wed, 21 Dec 2022 21:19:02 -0700 +Subject: [PATCH 24/29] mm: multi-gen LRU: remove aging fairness safeguard + +Recall that the aging produces the youngest generation: first it scans +for accessed pages and updates their gen counters; then it increments +lrugen->max_seq. + +The current aging fairness safeguard for kswapd uses two passes to +ensure the fairness to multiple eligible memcgs. On the first pass, +which is shared with the eviction, it checks whether all eligible +memcgs are low on cold pages. If so, it requires a second pass, on +which it ages all those memcgs at the same time. + +With memcg LRU, the aging, while ensuring eventual fairness, will run +when necessary. Therefore the current aging fairness safeguard for +kswapd will not be needed. + +Note that memcg LRU only applies to global reclaim. For memcg reclaim, +the aging can be unfair to different memcgs, i.e., their +lrugen->max_seq can be incremented at different paces. + +Link: https://lkml.kernel.org/r/20221222041905.2431096-5-yuzhao@google.com +Signed-off-by: Yu Zhao <yuzhao@google.com> +Cc: Johannes Weiner <hannes@cmpxchg.org> +Cc: Jonathan Corbet <corbet@lwn.net> +Cc: Michael Larabel <Michael@MichaelLarabel.com> +Cc: Michal Hocko <mhocko@kernel.org> +Cc: Mike Rapoport <rppt@kernel.org> +Cc: Roman Gushchin <roman.gushchin@linux.dev> +Cc: Suren Baghdasaryan <surenb@google.com> +Signed-off-by: Andrew Morton <akpm@linux-foundation.org> +--- + mm/vmscan.c | 126 ++++++++++++++++++++++++---------------------------- + 1 file changed, 59 insertions(+), 67 deletions(-) + +diff --git a/mm/vmscan.c b/mm/vmscan.c +index 40e7a947c5c7..7159436872ba 100644 +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -131,7 +131,6 @@ struct scan_control { + + #ifdef CONFIG_LRU_GEN + /* help kswapd make better choices among multiple memcgs */ +- unsigned int memcgs_need_aging:1; + unsigned long last_reclaimed; + #endif + +@@ -4184,7 +4183,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq, + return true; + } + +-static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsigned long *min_seq, ++static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, + struct scan_control *sc, bool can_swap, unsigned long *nr_to_scan) + { + int gen, type, zone; +@@ -4193,6 +4192,13 @@ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsig + unsigned long total = 0; + struct lru_gen_page *lrugen = &lruvec->lrugen; + struct mem_cgroup *memcg = lruvec_memcg(lruvec); ++ DEFINE_MIN_SEQ(lruvec); ++ ++ /* whether this lruvec is completely out of cold pages */ ++ if (min_seq[!can_swap] + MIN_NR_GENS > max_seq) { ++ *nr_to_scan = 0; ++ return true; ++ } + + for (type = !can_swap; type < ANON_AND_FILE; type++) { + unsigned long seq; +@@ -4221,8 +4227,6 @@ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsig + * stalls when the number of generations reaches MIN_NR_GENS. Hence, the + * ideal number of generations is MIN_NR_GENS+1. + */ +- if (min_seq[!can_swap] + MIN_NR_GENS > max_seq) +- return true; + if (min_seq[!can_swap] + MIN_NR_GENS < max_seq) + return false; + +@@ -4241,40 +4245,54 @@ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsig + return false; + } + +-static bool age_lruvec(struct lruvec *lruvec, struct scan_control *sc, unsigned long min_ttl) ++static bool lruvec_is_sizable(struct lruvec *lruvec, struct scan_control *sc) + { +- bool need_aging; +- unsigned long nr_to_scan; +- int swappiness = get_swappiness(lruvec, sc); ++ int gen, type, zone; ++ unsigned long total = 0; ++ bool can_swap = get_swappiness(lruvec, sc); ++ struct lru_gen_page *lrugen = &lruvec->lrugen; + struct mem_cgroup *memcg = lruvec_memcg(lruvec); + DEFINE_MAX_SEQ(lruvec); + DEFINE_MIN_SEQ(lruvec); + +- VM_WARN_ON_ONCE(sc->memcg_low_reclaim); ++ for (type = !can_swap; type < ANON_AND_FILE; type++) { ++ unsigned long seq; + +- mem_cgroup_calculate_protection(NULL, memcg); ++ for (seq = min_seq[type]; seq <= max_seq; seq++) { ++ gen = lru_gen_from_seq(seq); + +- if (mem_cgroup_below_min(memcg)) +- return false; ++ for (zone = 0; zone < MAX_NR_ZONES; zone++) ++ total += max(READ_ONCE(lrugen->nr_pages[gen][type][zone]), 0L); ++ } ++ } + +- need_aging = should_run_aging(lruvec, max_seq, min_seq, sc, swappiness, &nr_to_scan); ++ /* whether the size is big enough to be helpful */ ++ return mem_cgroup_online(memcg) ? (total >> sc->priority) : total; ++} + +- if (min_ttl) { +- int gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]); +- unsigned long birth = READ_ONCE(lruvec->lrugen.timestamps[gen]); ++static bool lruvec_is_reclaimable(struct lruvec *lruvec, struct scan_control *sc, ++ unsigned long min_ttl) ++{ ++ int gen; ++ unsigned long birth; ++ struct mem_cgroup *memcg = lruvec_memcg(lruvec); ++ DEFINE_MIN_SEQ(lruvec); + +- if (time_is_after_jiffies(birth + min_ttl)) +- return false; ++ VM_WARN_ON_ONCE(sc->memcg_low_reclaim); + +- /* the size is likely too small to be helpful */ +- if (!nr_to_scan && sc->priority != DEF_PRIORITY) +- return false; +- } ++ /* see the comment on lru_gen_page */ ++ gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]); ++ birth = READ_ONCE(lruvec->lrugen.timestamps[gen]); + +- if (need_aging) +- try_to_inc_max_seq(lruvec, max_seq, sc, swappiness, false); ++ if (time_is_after_jiffies(birth + min_ttl)) ++ return false; + +- return true; ++ if (!lruvec_is_sizable(lruvec, sc)) ++ return false; ++ ++ mem_cgroup_calculate_protection(NULL, memcg); ++ ++ return !mem_cgroup_below_min(memcg); + } + + /* to protect the working set of the last N jiffies */ +@@ -4283,46 +4301,32 @@ static unsigned long lru_gen_min_ttl __read_mostly; + static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc) + { + struct mem_cgroup *memcg; +- bool success = false; + unsigned long min_ttl = READ_ONCE(lru_gen_min_ttl); + + VM_WARN_ON_ONCE(!current_is_kswapd()); + + sc->last_reclaimed = sc->nr_reclaimed; + +- /* +- * To reduce the chance of going into the aging path, which can be +- * costly, optimistically skip it if the flag below was cleared in the +- * eviction path. This improves the overall performance when multiple +- * memcgs are available. +- */ +- if (!sc->memcgs_need_aging) { +- sc->memcgs_need_aging = true; ++ /* check the order to exclude compaction-induced reclaim */ ++ if (!min_ttl || sc->order || sc->priority == DEF_PRIORITY) + return; +- } +- +- set_mm_walk(pgdat); + + memcg = mem_cgroup_iter(NULL, NULL, NULL); + do { + struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat); + +- if (age_lruvec(lruvec, sc, min_ttl)) +- success = true; ++ if (lruvec_is_reclaimable(lruvec, sc, min_ttl)) { ++ mem_cgroup_iter_break(NULL, memcg); ++ return; ++ } + + cond_resched(); + } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL))); + +- clear_mm_walk(); +- +- /* check the order to exclude compaction-induced reclaim */ +- if (success || !min_ttl || sc->order) +- return; +- + /* + * The main goal is to OOM kill if every generation from all memcgs is + * younger than min_ttl. However, another possibility is all memcgs are +- * either below min or empty. ++ * either too small or below min. + */ + if (mutex_trylock(&oom_lock)) { + struct oom_control oc = { +@@ -4830,33 +4834,27 @@ static int evict_pages(struct lruvec *lruvec, struct scan_control *sc, int swapp + * reclaim. + */ + static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, +- bool can_swap, bool *need_aging) ++ bool can_swap) + { + unsigned long nr_to_scan; + struct mem_cgroup *memcg = lruvec_memcg(lruvec); + DEFINE_MAX_SEQ(lruvec); +- DEFINE_MIN_SEQ(lruvec); + + if (mem_cgroup_below_min(memcg) || + (mem_cgroup_below_low(memcg) && !sc->memcg_low_reclaim)) + return 0; + +- *need_aging = should_run_aging(lruvec, max_seq, min_seq, sc, can_swap, &nr_to_scan); +- if (!*need_aging) ++ if (!should_run_aging(lruvec, max_seq, sc, can_swap, &nr_to_scan)) + return nr_to_scan; + + /* skip the aging path at the default priority */ + if (sc->priority == DEF_PRIORITY) +- goto done; ++ return nr_to_scan; + +- /* leave the work to lru_gen_age_node() */ +- if (current_is_kswapd()) +- return 0; ++ try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false); + +- if (try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false)) +- return nr_to_scan; +-done: +- return min_seq[!can_swap] + MIN_NR_GENS <= max_seq ? nr_to_scan : 0; ++ /* skip this lruvec as it's low on cold pages */ ++ return 0; + } + + static unsigned long get_nr_to_reclaim(struct scan_control *sc) +@@ -4875,9 +4873,7 @@ static unsigned long get_nr_to_reclaim(struct scan_control *sc) + static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) + { + struct blk_plug plug; +- bool need_aging = false; + unsigned long scanned = 0; +- unsigned long reclaimed = sc->nr_reclaimed; + unsigned long nr_to_reclaim = get_nr_to_reclaim(sc); + + lru_add_drain(); +@@ -4898,13 +4894,13 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc + else + swappiness = 0; + +- nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness, &need_aging); ++ nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness); + if (!nr_to_scan) +- goto done; ++ break; + + delta = evict_pages(lruvec, sc, swappiness); + if (!delta) +- goto done; ++ break; + + scanned += delta; + if (scanned >= nr_to_scan) +@@ -4916,10 +4912,6 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc + cond_resched(); + } + +- /* see the comment in lru_gen_age_node() */ +- if (sc->nr_reclaimed - reclaimed >= MIN_LRU_BATCH && !need_aging) +- sc->memcgs_need_aging = false; +-done: + clear_mm_walk(); + + blk_finish_plug(&plug); +-- +2.40.0 + |