aboutsummaryrefslogtreecommitdiffstats
path: root/target/linux/generic/backport-5.15/020-v6.1-10-mm-multi-gen-LRU-kill-switch.patch
diff options
context:
space:
mode:
authorKazuki H <kazukih0205@gmail.com>2023-03-21 06:51:03 +0900
committerChristian Marangi <ansuelsmth@gmail.com>2023-03-27 14:16:10 +0200
commit0d0928f58795e336646ad31ea96d2919b5328f39 (patch)
treeeb321324f622f740f72233d019ef01873a4f97cf /target/linux/generic/backport-5.15/020-v6.1-10-mm-multi-gen-LRU-kill-switch.patch
parentdc79b51533cfe9a7806353f6c6fd6b22cd80d536 (diff)
downloadupstream-0d0928f58795e336646ad31ea96d2919b5328f39.tar.gz
upstream-0d0928f58795e336646ad31ea96d2919b5328f39.tar.bz2
upstream-0d0928f58795e336646ad31ea96d2919b5328f39.zip
kernel: Update MGLRU patchset
The current patches are old, update them from mainline. Backports taken from https://github.com/yuzhaogoogle/linux/commits/mglru-5.15 Tested-by: Kazuki H <kazukih0205@gmail.com> #mt7622/Linksys E8450 UBI Signed-off-by: Kazuki H <kazukih0205@gmail.com>
Diffstat (limited to 'target/linux/generic/backport-5.15/020-v6.1-10-mm-multi-gen-LRU-kill-switch.patch')
-rw-r--r--target/linux/generic/backport-5.15/020-v6.1-10-mm-multi-gen-LRU-kill-switch.patch513
1 files changed, 513 insertions, 0 deletions
diff --git a/target/linux/generic/backport-5.15/020-v6.1-10-mm-multi-gen-LRU-kill-switch.patch b/target/linux/generic/backport-5.15/020-v6.1-10-mm-multi-gen-LRU-kill-switch.patch
new file mode 100644
index 0000000000..0adb15f5e2
--- /dev/null
+++ b/target/linux/generic/backport-5.15/020-v6.1-10-mm-multi-gen-LRU-kill-switch.patch
@@ -0,0 +1,513 @@
+From 640db3a029dca909af47157ca18f52b29d34a1b9 Mon Sep 17 00:00:00 2001
+From: Yu Zhao <yuzhao@google.com>
+Date: Sun, 18 Sep 2022 02:00:07 -0600
+Subject: [PATCH 10/29] mm: multi-gen LRU: kill switch
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Add /sys/kernel/mm/lru_gen/enabled as a kill switch. Components that
+can be disabled include:
+ 0x0001: the multi-gen LRU core
+ 0x0002: walking page table, when arch_has_hw_pte_young() returns
+ true
+ 0x0004: clearing the accessed bit in non-leaf PMD entries, when
+ CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG=y
+ [yYnN]: apply to all the components above
+E.g.,
+ echo y >/sys/kernel/mm/lru_gen/enabled
+ cat /sys/kernel/mm/lru_gen/enabled
+ 0x0007
+ echo 5 >/sys/kernel/mm/lru_gen/enabled
+ cat /sys/kernel/mm/lru_gen/enabled
+ 0x0005
+
+NB: the page table walks happen on the scale of seconds under heavy memory
+pressure, in which case the mmap_lock contention is a lesser concern,
+compared with the LRU lock contention and the I/O congestion. So far the
+only well-known case of the mmap_lock contention happens on Android, due
+to Scudo [1] which allocates several thousand VMAs for merely a few
+hundred MBs. The SPF and the Maple Tree also have provided their own
+assessments [2][3]. However, if walking page tables does worsen the
+mmap_lock contention, the kill switch can be used to disable it. In this
+case the multi-gen LRU will suffer a minor performance degradation, as
+shown previously.
+
+Clearing the accessed bit in non-leaf PMD entries can also be disabled,
+since this behavior was not tested on x86 varieties other than Intel and
+AMD.
+
+[1] https://source.android.com/devices/tech/debug/scudo
+[2] https://lore.kernel.org/r/20220128131006.67712-1-michel@lespinasse.org/
+[3] https://lore.kernel.org/r/20220426150616.3937571-1-Liam.Howlett@oracle.com/
+
+Link: https://lkml.kernel.org/r/20220918080010.2920238-11-yuzhao@google.com
+Signed-off-by: Yu Zhao <yuzhao@google.com>
+Acked-by: Brian Geffon <bgeffon@google.com>
+Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
+Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
+Acked-by: Steven Barrett <steven@liquorix.net>
+Acked-by: Suleiman Souhlal <suleiman@google.com>
+Tested-by: Daniel Byrne <djbyrne@mtu.edu>
+Tested-by: Donald Carr <d@chaos-reins.com>
+Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
+Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
+Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
+Tested-by: Sofia Trinh <sofia.trinh@edi.works>
+Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+Cc: Barry Song <baohua@kernel.org>
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Hillf Danton <hdanton@sina.com>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Jonathan Corbet <corbet@lwn.net>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Miaohe Lin <linmiaohe@huawei.com>
+Cc: Michael Larabel <Michael@MichaelLarabel.com>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Mike Rapoport <rppt@kernel.org>
+Cc: Mike Rapoport <rppt@linux.ibm.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Qi Zheng <zhengqi.arch@bytedance.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Will Deacon <will@kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+---
+ include/linux/cgroup.h | 15 ++-
+ include/linux/mm_inline.h | 15 ++-
+ include/linux/mmzone.h | 9 ++
+ kernel/cgroup/cgroup-internal.h | 1 -
+ mm/Kconfig | 6 +
+ mm/vmscan.c | 228 +++++++++++++++++++++++++++++++-
+ 6 files changed, 265 insertions(+), 9 deletions(-)
+
+diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
+index 45cdb12243e3..f9a5d6a81101 100644
+--- a/include/linux/cgroup.h
++++ b/include/linux/cgroup.h
+@@ -433,6 +433,18 @@ static inline void cgroup_put(struct cgroup *cgrp)
+ css_put(&cgrp->self);
+ }
+
++extern struct mutex cgroup_mutex;
++
++static inline void cgroup_lock(void)
++{
++ mutex_lock(&cgroup_mutex);
++}
++
++static inline void cgroup_unlock(void)
++{
++ mutex_unlock(&cgroup_mutex);
++}
++
+ /**
+ * task_css_set_check - obtain a task's css_set with extra access conditions
+ * @task: the task to obtain css_set for
+@@ -447,7 +459,6 @@ static inline void cgroup_put(struct cgroup *cgrp)
+ * as locks used during the cgroup_subsys::attach() methods.
+ */
+ #ifdef CONFIG_PROVE_RCU
+-extern struct mutex cgroup_mutex;
+ extern spinlock_t css_set_lock;
+ #define task_css_set_check(task, __c) \
+ rcu_dereference_check((task)->cgroups, \
+@@ -708,6 +719,8 @@ struct cgroup;
+ static inline u64 cgroup_id(const struct cgroup *cgrp) { return 1; }
+ static inline void css_get(struct cgroup_subsys_state *css) {}
+ static inline void css_put(struct cgroup_subsys_state *css) {}
++static inline void cgroup_lock(void) {}
++static inline void cgroup_unlock(void) {}
+ static inline int cgroup_attach_task_all(struct task_struct *from,
+ struct task_struct *t) { return 0; }
+ static inline int cgroupstats_build(struct cgroupstats *stats,
+diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
+index 58aabb1ba020..e095c1c24311 100644
+--- a/include/linux/mm_inline.h
++++ b/include/linux/mm_inline.h
+@@ -91,10 +91,21 @@ static __always_inline enum lru_list page_lru(struct page *page)
+
+ #ifdef CONFIG_LRU_GEN
+
++#ifdef CONFIG_LRU_GEN_ENABLED
+ static inline bool lru_gen_enabled(void)
+ {
+- return true;
++ DECLARE_STATIC_KEY_TRUE(lru_gen_caps[NR_LRU_GEN_CAPS]);
++
++ return static_branch_likely(&lru_gen_caps[LRU_GEN_CORE]);
++}
++#else
++static inline bool lru_gen_enabled(void)
++{
++ DECLARE_STATIC_KEY_FALSE(lru_gen_caps[NR_LRU_GEN_CAPS]);
++
++ return static_branch_unlikely(&lru_gen_caps[LRU_GEN_CORE]);
+ }
++#endif
+
+ static inline bool lru_gen_in_fault(void)
+ {
+@@ -207,7 +218,7 @@ static inline bool lru_gen_add_page(struct lruvec *lruvec, struct page *page, bo
+
+ VM_WARN_ON_ONCE_PAGE(gen != -1, page);
+
+- if (PageUnevictable(page))
++ if (PageUnevictable(page) || !lrugen->enabled)
+ return false;
+ /*
+ * There are three common cases for this page:
+diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
+index 659bab633bdf..edaf035503ed 100644
+--- a/include/linux/mmzone.h
++++ b/include/linux/mmzone.h
+@@ -364,6 +364,13 @@ enum {
+ LRU_GEN_FILE,
+ };
+
++enum {
++ LRU_GEN_CORE,
++ LRU_GEN_MM_WALK,
++ LRU_GEN_NONLEAF_YOUNG,
++ NR_LRU_GEN_CAPS
++};
++
+ #define MIN_LRU_BATCH BITS_PER_LONG
+ #define MAX_LRU_BATCH (MIN_LRU_BATCH * 64)
+
+@@ -405,6 +412,8 @@ struct lru_gen_struct {
+ /* can be modified without holding the LRU lock */
+ atomic_long_t evicted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
+ atomic_long_t refaulted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
++ /* whether the multi-gen LRU is enabled */
++ bool enabled;
+ };
+
+ enum {
+diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h
+index d8fcc139ac05..28c32a01da7d 100644
+--- a/kernel/cgroup/cgroup-internal.h
++++ b/kernel/cgroup/cgroup-internal.h
+@@ -165,7 +165,6 @@ struct cgroup_mgctx {
+ #define DEFINE_CGROUP_MGCTX(name) \
+ struct cgroup_mgctx name = CGROUP_MGCTX_INIT(name)
+
+-extern struct mutex cgroup_mutex;
+ extern spinlock_t css_set_lock;
+ extern struct cgroup_subsys *cgroup_subsys[];
+ extern struct list_head cgroup_roots;
+diff --git a/mm/Kconfig b/mm/Kconfig
+index 62433f3cd7ae..4a7d0af3c39b 100644
+--- a/mm/Kconfig
++++ b/mm/Kconfig
+@@ -906,6 +906,12 @@ config LRU_GEN
+ help
+ A high performance LRU implementation to overcommit memory.
+
++config LRU_GEN_ENABLED
++ bool "Enable by default"
++ depends on LRU_GEN
++ help
++ This option enables the multi-gen LRU by default.
++
+ config LRU_GEN_STATS
+ bool "Full stats for debugging"
+ depends on LRU_GEN
+diff --git a/mm/vmscan.c b/mm/vmscan.c
+index b6f6fc2585e1..be37d996bc92 100644
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -52,6 +52,7 @@
+ #include <linux/psi.h>
+ #include <linux/pagewalk.h>
+ #include <linux/shmem_fs.h>
++#include <linux/ctype.h>
+
+ #include <asm/tlbflush.h>
+ #include <asm/div64.h>
+@@ -2841,6 +2842,14 @@ static bool can_age_anon_pages(struct pglist_data *pgdat,
+
+ #ifdef CONFIG_LRU_GEN
+
++#ifdef CONFIG_LRU_GEN_ENABLED
++DEFINE_STATIC_KEY_ARRAY_TRUE(lru_gen_caps, NR_LRU_GEN_CAPS);
++#define get_cap(cap) static_branch_likely(&lru_gen_caps[cap])
++#else
++DEFINE_STATIC_KEY_ARRAY_FALSE(lru_gen_caps, NR_LRU_GEN_CAPS);
++#define get_cap(cap) static_branch_unlikely(&lru_gen_caps[cap])
++#endif
++
+ /******************************************************************************
+ * shorthand helpers
+ ******************************************************************************/
+@@ -3717,7 +3726,8 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area
+ goto next;
+
+ if (!pmd_trans_huge(pmd[i])) {
+- if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG))
++ if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) &&
++ get_cap(LRU_GEN_NONLEAF_YOUNG))
+ pmdp_test_and_clear_young(vma, addr, pmd + i);
+ goto next;
+ }
+@@ -3815,10 +3825,12 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end,
+ walk->mm_stats[MM_NONLEAF_TOTAL]++;
+
+ #ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG
+- if (!pmd_young(val))
+- continue;
++ if (get_cap(LRU_GEN_NONLEAF_YOUNG)) {
++ if (!pmd_young(val))
++ continue;
+
+- walk_pmd_range_locked(pud, addr, vma, args, bitmap, &pos);
++ walk_pmd_range_locked(pud, addr, vma, args, bitmap, &pos);
++ }
+ #endif
+ if (!walk->force_scan && !test_bloom_filter(walk->lruvec, walk->max_seq, pmd + i))
+ continue;
+@@ -4080,7 +4092,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
+ * handful of PTEs. Spreading the work out over a period of time usually
+ * is less efficient, but it avoids bursty page faults.
+ */
+- if (!arch_has_hw_pte_young()) {
++ if (!(arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK))) {
+ success = iterate_mm_list_nowalk(lruvec, max_seq);
+ goto done;
+ }
+@@ -4845,6 +4857,208 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
+ blk_finish_plug(&plug);
+ }
+
++/******************************************************************************
++ * state change
++ ******************************************************************************/
++
++static bool __maybe_unused state_is_valid(struct lruvec *lruvec)
++{
++ struct lru_gen_struct *lrugen = &lruvec->lrugen;
++
++ if (lrugen->enabled) {
++ enum lru_list lru;
++
++ for_each_evictable_lru(lru) {
++ if (!list_empty(&lruvec->lists[lru]))
++ return false;
++ }
++ } else {
++ int gen, type, zone;
++
++ for_each_gen_type_zone(gen, type, zone) {
++ if (!list_empty(&lrugen->lists[gen][type][zone]))
++ return false;
++ }
++ }
++
++ return true;
++}
++
++static bool fill_evictable(struct lruvec *lruvec)
++{
++ enum lru_list lru;
++ int remaining = MAX_LRU_BATCH;
++
++ for_each_evictable_lru(lru) {
++ int type = is_file_lru(lru);
++ bool active = is_active_lru(lru);
++ struct list_head *head = &lruvec->lists[lru];
++
++ while (!list_empty(head)) {
++ bool success;
++ struct page *page = lru_to_page(head);
++
++ VM_WARN_ON_ONCE_PAGE(PageUnevictable(page), page);
++ VM_WARN_ON_ONCE_PAGE(PageActive(page) != active, page);
++ VM_WARN_ON_ONCE_PAGE(page_is_file_lru(page) != type, page);
++ VM_WARN_ON_ONCE_PAGE(page_lru_gen(page) != -1, page);
++
++ del_page_from_lru_list(page, lruvec);
++ success = lru_gen_add_page(lruvec, page, false);
++ VM_WARN_ON_ONCE(!success);
++
++ if (!--remaining)
++ return false;
++ }
++ }
++
++ return true;
++}
++
++static bool drain_evictable(struct lruvec *lruvec)
++{
++ int gen, type, zone;
++ int remaining = MAX_LRU_BATCH;
++
++ for_each_gen_type_zone(gen, type, zone) {
++ struct list_head *head = &lruvec->lrugen.lists[gen][type][zone];
++
++ while (!list_empty(head)) {
++ bool success;
++ struct page *page = lru_to_page(head);
++
++ VM_WARN_ON_ONCE_PAGE(PageUnevictable(page), page);
++ VM_WARN_ON_ONCE_PAGE(PageActive(page), page);
++ VM_WARN_ON_ONCE_PAGE(page_is_file_lru(page) != type, page);
++ VM_WARN_ON_ONCE_PAGE(page_zonenum(page) != zone, page);
++
++ success = lru_gen_del_page(lruvec, page, false);
++ VM_WARN_ON_ONCE(!success);
++ add_page_to_lru_list(page, lruvec);
++
++ if (!--remaining)
++ return false;
++ }
++ }
++
++ return true;
++}
++
++static void lru_gen_change_state(bool enabled)
++{
++ static DEFINE_MUTEX(state_mutex);
++
++ struct mem_cgroup *memcg;
++
++ cgroup_lock();
++ cpus_read_lock();
++ get_online_mems();
++ mutex_lock(&state_mutex);
++
++ if (enabled == lru_gen_enabled())
++ goto unlock;
++
++ if (enabled)
++ static_branch_enable_cpuslocked(&lru_gen_caps[LRU_GEN_CORE]);
++ else
++ static_branch_disable_cpuslocked(&lru_gen_caps[LRU_GEN_CORE]);
++
++ memcg = mem_cgroup_iter(NULL, NULL, NULL);
++ do {
++ int nid;
++
++ for_each_node(nid) {
++ struct lruvec *lruvec = get_lruvec(memcg, nid);
++
++ if (!lruvec)
++ continue;
++
++ spin_lock_irq(&lruvec->lru_lock);
++
++ VM_WARN_ON_ONCE(!seq_is_valid(lruvec));
++ VM_WARN_ON_ONCE(!state_is_valid(lruvec));
++
++ lruvec->lrugen.enabled = enabled;
++
++ while (!(enabled ? fill_evictable(lruvec) : drain_evictable(lruvec))) {
++ spin_unlock_irq(&lruvec->lru_lock);
++ cond_resched();
++ spin_lock_irq(&lruvec->lru_lock);
++ }
++
++ spin_unlock_irq(&lruvec->lru_lock);
++ }
++
++ cond_resched();
++ } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
++unlock:
++ mutex_unlock(&state_mutex);
++ put_online_mems();
++ cpus_read_unlock();
++ cgroup_unlock();
++}
++
++/******************************************************************************
++ * sysfs interface
++ ******************************************************************************/
++
++static ssize_t show_enabled(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
++{
++ unsigned int caps = 0;
++
++ if (get_cap(LRU_GEN_CORE))
++ caps |= BIT(LRU_GEN_CORE);
++
++ if (arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK))
++ caps |= BIT(LRU_GEN_MM_WALK);
++
++ if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) && get_cap(LRU_GEN_NONLEAF_YOUNG))
++ caps |= BIT(LRU_GEN_NONLEAF_YOUNG);
++
++ return snprintf(buf, PAGE_SIZE, "0x%04x\n", caps);
++}
++
++static ssize_t store_enabled(struct kobject *kobj, struct kobj_attribute *attr,
++ const char *buf, size_t len)
++{
++ int i;
++ unsigned int caps;
++
++ if (tolower(*buf) == 'n')
++ caps = 0;
++ else if (tolower(*buf) == 'y')
++ caps = -1;
++ else if (kstrtouint(buf, 0, &caps))
++ return -EINVAL;
++
++ for (i = 0; i < NR_LRU_GEN_CAPS; i++) {
++ bool enabled = caps & BIT(i);
++
++ if (i == LRU_GEN_CORE)
++ lru_gen_change_state(enabled);
++ else if (enabled)
++ static_branch_enable(&lru_gen_caps[i]);
++ else
++ static_branch_disable(&lru_gen_caps[i]);
++ }
++
++ return len;
++}
++
++static struct kobj_attribute lru_gen_enabled_attr = __ATTR(
++ enabled, 0644, show_enabled, store_enabled
++);
++
++static struct attribute *lru_gen_attrs[] = {
++ &lru_gen_enabled_attr.attr,
++ NULL
++};
++
++static struct attribute_group lru_gen_attr_group = {
++ .name = "lru_gen",
++ .attrs = lru_gen_attrs,
++};
++
+ /******************************************************************************
+ * initialization
+ ******************************************************************************/
+@@ -4855,6 +5069,7 @@ void lru_gen_init_lruvec(struct lruvec *lruvec)
+ struct lru_gen_struct *lrugen = &lruvec->lrugen;
+
+ lrugen->max_seq = MIN_NR_GENS + 1;
++ lrugen->enabled = lru_gen_enabled();
+
+ for_each_gen_type_zone(gen, type, zone)
+ INIT_LIST_HEAD(&lrugen->lists[gen][type][zone]);
+@@ -4894,6 +5109,9 @@ static int __init init_lru_gen(void)
+ BUILD_BUG_ON(MIN_NR_GENS + 1 >= MAX_NR_GENS);
+ BUILD_BUG_ON(BIT(LRU_GEN_WIDTH) <= MAX_NR_GENS);
+
++ if (sysfs_create_group(mm_kobj, &lru_gen_attr_group))
++ pr_err("lru_gen: failed to create sysfs group\n");
++
+ return 0;
+ };
+ late_initcall(init_lru_gen);
+--
+2.40.0
+