aboutsummaryrefslogtreecommitdiffstats
path: root/target/linux/generic/backport-5.15/020-v6.1-07-mm-multi-gen-LRU-exploit-locality-in-rmap.patch
diff options
context:
space:
mode:
authorKazuki H <kazukih0205@gmail.com>2023-03-21 06:51:03 +0900
committerChristian Marangi <ansuelsmth@gmail.com>2023-03-27 14:16:10 +0200
commit0d0928f58795e336646ad31ea96d2919b5328f39 (patch)
treeeb321324f622f740f72233d019ef01873a4f97cf /target/linux/generic/backport-5.15/020-v6.1-07-mm-multi-gen-LRU-exploit-locality-in-rmap.patch
parentdc79b51533cfe9a7806353f6c6fd6b22cd80d536 (diff)
downloadupstream-0d0928f58795e336646ad31ea96d2919b5328f39.tar.gz
upstream-0d0928f58795e336646ad31ea96d2919b5328f39.tar.bz2
upstream-0d0928f58795e336646ad31ea96d2919b5328f39.zip
kernel: Update MGLRU patchset
The current patches are old, update them from mainline. Backports taken from https://github.com/yuzhaogoogle/linux/commits/mglru-5.15 Tested-by: Kazuki H <kazukih0205@gmail.com> #mt7622/Linksys E8450 UBI Signed-off-by: Kazuki H <kazukih0205@gmail.com>
Diffstat (limited to 'target/linux/generic/backport-5.15/020-v6.1-07-mm-multi-gen-LRU-exploit-locality-in-rmap.patch')
-rw-r--r--target/linux/generic/backport-5.15/020-v6.1-07-mm-multi-gen-LRU-exploit-locality-in-rmap.patch508
1 files changed, 508 insertions, 0 deletions
diff --git a/target/linux/generic/backport-5.15/020-v6.1-07-mm-multi-gen-LRU-exploit-locality-in-rmap.patch b/target/linux/generic/backport-5.15/020-v6.1-07-mm-multi-gen-LRU-exploit-locality-in-rmap.patch
new file mode 100644
index 0000000000..e0c6380b5f
--- /dev/null
+++ b/target/linux/generic/backport-5.15/020-v6.1-07-mm-multi-gen-LRU-exploit-locality-in-rmap.patch
@@ -0,0 +1,508 @@
+From e4277535f6d6708bb19b88c4bad155832671d69b Mon Sep 17 00:00:00 2001
+From: Yu Zhao <yuzhao@google.com>
+Date: Sun, 18 Sep 2022 02:00:04 -0600
+Subject: [PATCH 07/29] mm: multi-gen LRU: exploit locality in rmap
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Searching the rmap for PTEs mapping each page on an LRU list (to test and
+clear the accessed bit) can be expensive because pages from different VMAs
+(PA space) are not cache friendly to the rmap (VA space). For workloads
+mostly using mapped pages, searching the rmap can incur the highest CPU
+cost in the reclaim path.
+
+This patch exploits spatial locality to reduce the trips into the rmap.
+When shrink_page_list() walks the rmap and finds a young PTE, a new
+function lru_gen_look_around() scans at most BITS_PER_LONG-1 adjacent
+PTEs. On finding another young PTE, it clears the accessed bit and
+updates the gen counter of the page mapped by this PTE to
+(max_seq%MAX_NR_GENS)+1.
+
+Server benchmark results:
+ Single workload:
+ fio (buffered I/O): no change
+
+ Single workload:
+ memcached (anon): +[3, 5]%
+ Ops/sec KB/sec
+ patch1-6: 1106168.46 43025.04
+ patch1-7: 1147696.57 44640.29
+
+ Configurations:
+ no change
+
+Client benchmark results:
+ kswapd profiles:
+ patch1-6
+ 39.03% lzo1x_1_do_compress (real work)
+ 18.47% page_vma_mapped_walk (overhead)
+ 6.74% _raw_spin_unlock_irq
+ 3.97% do_raw_spin_lock
+ 2.49% ptep_clear_flush
+ 2.48% anon_vma_interval_tree_iter_first
+ 1.92% page_referenced_one
+ 1.88% __zram_bvec_write
+ 1.48% memmove
+ 1.31% vma_interval_tree_iter_next
+
+ patch1-7
+ 48.16% lzo1x_1_do_compress (real work)
+ 8.20% page_vma_mapped_walk (overhead)
+ 7.06% _raw_spin_unlock_irq
+ 2.92% ptep_clear_flush
+ 2.53% __zram_bvec_write
+ 2.11% do_raw_spin_lock
+ 2.02% memmove
+ 1.93% lru_gen_look_around
+ 1.56% free_unref_page_list
+ 1.40% memset
+
+ Configurations:
+ no change
+
+Link: https://lkml.kernel.org/r/20220918080010.2920238-8-yuzhao@google.com
+Signed-off-by: Yu Zhao <yuzhao@google.com>
+Acked-by: Barry Song <baohua@kernel.org>
+Acked-by: Brian Geffon <bgeffon@google.com>
+Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
+Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
+Acked-by: Steven Barrett <steven@liquorix.net>
+Acked-by: Suleiman Souhlal <suleiman@google.com>
+Tested-by: Daniel Byrne <djbyrne@mtu.edu>
+Tested-by: Donald Carr <d@chaos-reins.com>
+Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
+Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
+Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
+Tested-by: Sofia Trinh <sofia.trinh@edi.works>
+Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
+Cc: Andi Kleen <ak@linux.intel.com>
+Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Hillf Danton <hdanton@sina.com>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Jonathan Corbet <corbet@lwn.net>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Miaohe Lin <linmiaohe@huawei.com>
+Cc: Michael Larabel <Michael@MichaelLarabel.com>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Mike Rapoport <rppt@kernel.org>
+Cc: Mike Rapoport <rppt@linux.ibm.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Qi Zheng <zhengqi.arch@bytedance.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: Will Deacon <will@kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+---
+ include/linux/memcontrol.h | 31 +++++++
+ include/linux/mmzone.h | 6 ++
+ mm/internal.h | 1 +
+ mm/memcontrol.c | 1 +
+ mm/rmap.c | 7 ++
+ mm/swap.c | 4 +-
+ mm/vmscan.c | 184 +++++++++++++++++++++++++++++++++++++
+ 7 files changed, 232 insertions(+), 2 deletions(-)
+
+diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
+index 4f189b17dafc..8d6a0329bc59 100644
+--- a/include/linux/memcontrol.h
++++ b/include/linux/memcontrol.h
+@@ -442,6 +442,7 @@ static inline struct obj_cgroup *__page_objcg(struct page *page)
+ * - LRU isolation
+ * - lock_page_memcg()
+ * - exclusive reference
++ * - mem_cgroup_trylock_pages()
+ *
+ * For a kmem page a caller should hold an rcu read lock to protect memcg
+ * associated with a kmem page from being released.
+@@ -497,6 +498,7 @@ static inline struct mem_cgroup *page_memcg_rcu(struct page *page)
+ * - LRU isolation
+ * - lock_page_memcg()
+ * - exclusive reference
++ * - mem_cgroup_trylock_pages()
+ *
+ * For a kmem page a caller should hold an rcu read lock to protect memcg
+ * associated with a kmem page from being released.
+@@ -953,6 +955,23 @@ void unlock_page_memcg(struct page *page);
+
+ void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val);
+
++/* try to stablize page_memcg() for all the pages in a memcg */
++static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg)
++{
++ rcu_read_lock();
++
++ if (mem_cgroup_disabled() || !atomic_read(&memcg->moving_account))
++ return true;
++
++ rcu_read_unlock();
++ return false;
++}
++
++static inline void mem_cgroup_unlock_pages(void)
++{
++ rcu_read_unlock();
++}
++
+ /* idx can be of type enum memcg_stat_item or node_stat_item */
+ static inline void mod_memcg_state(struct mem_cgroup *memcg,
+ int idx, int val)
+@@ -1369,6 +1388,18 @@ static inline void unlock_page_memcg(struct page *page)
+ {
+ }
+
++static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg)
++{
++ /* to match page_memcg_rcu() */
++ rcu_read_lock();
++ return true;
++}
++
++static inline void mem_cgroup_unlock_pages(void)
++{
++ rcu_read_unlock();
++}
++
+ static inline void mem_cgroup_handle_over_high(void)
+ {
+ }
+diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
+index fce8945c507c..4db2b877fcf9 100644
+--- a/include/linux/mmzone.h
++++ b/include/linux/mmzone.h
+@@ -352,6 +352,7 @@ enum lruvec_flags {
+ #ifndef __GENERATING_BOUNDS_H
+
+ struct lruvec;
++struct page_vma_mapped_walk;
+
+ #define LRU_GEN_MASK ((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF)
+ #define LRU_REFS_MASK ((BIT(LRU_REFS_WIDTH) - 1) << LRU_REFS_PGOFF)
+@@ -407,6 +408,7 @@ struct lru_gen_struct {
+ };
+
+ void lru_gen_init_lruvec(struct lruvec *lruvec);
++void lru_gen_look_around(struct page_vma_mapped_walk *pvmw);
+
+ #ifdef CONFIG_MEMCG
+ void lru_gen_init_memcg(struct mem_cgroup *memcg);
+@@ -419,6 +421,10 @@ static inline void lru_gen_init_lruvec(struct lruvec *lruvec)
+ {
+ }
+
++static inline void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
++{
++}
++
+ #ifdef CONFIG_MEMCG
+ static inline void lru_gen_init_memcg(struct mem_cgroup *memcg)
+ {
+diff --git a/mm/internal.h b/mm/internal.h
+index cf3cb933eba3..5c73246a092e 100644
+--- a/mm/internal.h
++++ b/mm/internal.h
+@@ -35,6 +35,7 @@
+ void page_writeback_init(void);
+
+ vm_fault_t do_swap_page(struct vm_fault *vmf);
++void activate_page(struct page *page);
+
+ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
+ unsigned long floor, unsigned long ceiling);
+diff --git a/mm/memcontrol.c b/mm/memcontrol.c
+index 8b634dc72e7f..cc3431c5d9ba 100644
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -2798,6 +2798,7 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg)
+ * - LRU isolation
+ * - lock_page_memcg()
+ * - exclusive reference
++ * - mem_cgroup_trylock_pages()
+ */
+ page->memcg_data = (unsigned long)memcg;
+ }
+diff --git a/mm/rmap.c b/mm/rmap.c
+index 330b361a460e..22a86122732e 100644
+--- a/mm/rmap.c
++++ b/mm/rmap.c
+@@ -73,6 +73,7 @@
+ #include <linux/page_idle.h>
+ #include <linux/memremap.h>
+ #include <linux/userfaultfd_k.h>
++#include <linux/mm_inline.h>
+
+ #include <asm/tlbflush.h>
+
+@@ -793,6 +794,12 @@ static bool page_referenced_one(struct page *page, struct vm_area_struct *vma,
+ }
+
+ if (pvmw.pte) {
++ if (lru_gen_enabled() && pte_young(*pvmw.pte) &&
++ !(vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ))) {
++ lru_gen_look_around(&pvmw);
++ referenced++;
++ }
++
+ if (ptep_clear_flush_young_notify(vma, address,
+ pvmw.pte)) {
+ /*
+diff --git a/mm/swap.c b/mm/swap.c
+index 5d227577b609..966ff2d83343 100644
+--- a/mm/swap.c
++++ b/mm/swap.c
+@@ -325,7 +325,7 @@ static bool need_activate_page_drain(int cpu)
+ return pagevec_count(&per_cpu(lru_pvecs.activate_page, cpu)) != 0;
+ }
+
+-static void activate_page(struct page *page)
++void activate_page(struct page *page)
+ {
+ page = compound_head(page);
+ if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
+@@ -345,7 +345,7 @@ static inline void activate_page_drain(int cpu)
+ {
+ }
+
+-static void activate_page(struct page *page)
++void activate_page(struct page *page)
+ {
+ struct lruvec *lruvec;
+
+diff --git a/mm/vmscan.c b/mm/vmscan.c
+index 932abd24c1b3..1d0b25ae378c 100644
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -1409,6 +1409,11 @@ static unsigned int shrink_page_list(struct list_head *page_list,
+ if (!sc->may_unmap && page_mapped(page))
+ goto keep_locked;
+
++ /* page_update_gen() tried to promote this page? */
++ if (lru_gen_enabled() && !ignore_references &&
++ page_mapped(page) && PageReferenced(page))
++ goto keep_locked;
++
+ may_enter_fs = (sc->gfp_mask & __GFP_FS) ||
+ (PageSwapCache(page) && (sc->gfp_mask & __GFP_IO));
+
+@@ -2990,6 +2995,29 @@ static bool positive_ctrl_err(struct ctrl_pos *sp, struct ctrl_pos *pv)
+ * the aging
+ ******************************************************************************/
+
++/* promote pages accessed through page tables */
++static int page_update_gen(struct page *page, int gen)
++{
++ unsigned long new_flags, old_flags = READ_ONCE(page->flags);
++
++ VM_WARN_ON_ONCE(gen >= MAX_NR_GENS);
++ VM_WARN_ON_ONCE(!rcu_read_lock_held());
++
++ do {
++ /* lru_gen_del_page() has isolated this page? */
++ if (!(old_flags & LRU_GEN_MASK)) {
++ /* for shrink_page_list() */
++ new_flags = old_flags | BIT(PG_referenced);
++ continue;
++ }
++
++ new_flags = old_flags & ~(LRU_GEN_MASK | LRU_REFS_MASK | LRU_REFS_FLAGS);
++ new_flags |= (gen + 1UL) << LRU_GEN_PGOFF;
++ } while (!try_cmpxchg(&page->flags, &old_flags, new_flags));
++
++ return ((old_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
++}
++
+ /* protect pages accessed multiple times through file descriptors */
+ static int page_inc_gen(struct lruvec *lruvec, struct page *page, bool reclaiming)
+ {
+@@ -3001,6 +3029,11 @@ static int page_inc_gen(struct lruvec *lruvec, struct page *page, bool reclaimin
+ VM_WARN_ON_ONCE_PAGE(!(old_flags & LRU_GEN_MASK), page);
+
+ do {
++ new_gen = ((old_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
++ /* page_update_gen() has promoted this page? */
++ if (new_gen >= 0 && new_gen != old_gen)
++ return new_gen;
++
+ new_gen = (old_gen + 1) % MAX_NR_GENS;
+
+ new_flags = old_flags & ~(LRU_GEN_MASK | LRU_REFS_MASK | LRU_REFS_FLAGS);
+@@ -3015,6 +3048,43 @@ static int page_inc_gen(struct lruvec *lruvec, struct page *page, bool reclaimin
+ return new_gen;
+ }
+
++static unsigned long get_pte_pfn(pte_t pte, struct vm_area_struct *vma, unsigned long addr)
++{
++ unsigned long pfn = pte_pfn(pte);
++
++ VM_WARN_ON_ONCE(addr < vma->vm_start || addr >= vma->vm_end);
++
++ if (!pte_present(pte) || is_zero_pfn(pfn))
++ return -1;
++
++ if (WARN_ON_ONCE(pte_devmap(pte) || pte_special(pte)))
++ return -1;
++
++ if (WARN_ON_ONCE(!pfn_valid(pfn)))
++ return -1;
++
++ return pfn;
++}
++
++static struct page *get_pfn_page(unsigned long pfn, struct mem_cgroup *memcg,
++ struct pglist_data *pgdat)
++{
++ struct page *page;
++
++ /* try to avoid unnecessary memory loads */
++ if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat))
++ return NULL;
++
++ page = compound_head(pfn_to_page(pfn));
++ if (page_to_nid(page) != pgdat->node_id)
++ return NULL;
++
++ if (page_memcg_rcu(page) != memcg)
++ return NULL;
++
++ return page;
++}
++
+ static void inc_min_seq(struct lruvec *lruvec, int type)
+ {
+ struct lru_gen_struct *lrugen = &lruvec->lrugen;
+@@ -3214,6 +3284,114 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
+ } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
+ }
+
++/*
++ * This function exploits spatial locality when shrink_page_list() walks the
++ * rmap. It scans the adjacent PTEs of a young PTE and promotes hot pages.
++ */
++void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
++{
++ int i;
++ pte_t *pte;
++ unsigned long start;
++ unsigned long end;
++ unsigned long addr;
++ unsigned long bitmap[BITS_TO_LONGS(MIN_LRU_BATCH)] = {};
++ struct page *page = pvmw->page;
++ struct mem_cgroup *memcg = page_memcg(page);
++ struct pglist_data *pgdat = page_pgdat(page);
++ struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
++ DEFINE_MAX_SEQ(lruvec);
++ int old_gen, new_gen = lru_gen_from_seq(max_seq);
++
++ lockdep_assert_held(pvmw->ptl);
++ VM_WARN_ON_ONCE_PAGE(PageLRU(page), page);
++
++ if (spin_is_contended(pvmw->ptl))
++ return;
++
++ start = max(pvmw->address & PMD_MASK, pvmw->vma->vm_start);
++ end = min(pvmw->address | ~PMD_MASK, pvmw->vma->vm_end - 1) + 1;
++
++ if (end - start > MIN_LRU_BATCH * PAGE_SIZE) {
++ if (pvmw->address - start < MIN_LRU_BATCH * PAGE_SIZE / 2)
++ end = start + MIN_LRU_BATCH * PAGE_SIZE;
++ else if (end - pvmw->address < MIN_LRU_BATCH * PAGE_SIZE / 2)
++ start = end - MIN_LRU_BATCH * PAGE_SIZE;
++ else {
++ start = pvmw->address - MIN_LRU_BATCH * PAGE_SIZE / 2;
++ end = pvmw->address + MIN_LRU_BATCH * PAGE_SIZE / 2;
++ }
++ }
++
++ pte = pvmw->pte - (pvmw->address - start) / PAGE_SIZE;
++
++ rcu_read_lock();
++ arch_enter_lazy_mmu_mode();
++
++ for (i = 0, addr = start; addr != end; i++, addr += PAGE_SIZE) {
++ unsigned long pfn;
++
++ pfn = get_pte_pfn(pte[i], pvmw->vma, addr);
++ if (pfn == -1)
++ continue;
++
++ if (!pte_young(pte[i]))
++ continue;
++
++ page = get_pfn_page(pfn, memcg, pgdat);
++ if (!page)
++ continue;
++
++ if (!ptep_test_and_clear_young(pvmw->vma, addr, pte + i))
++ VM_WARN_ON_ONCE(true);
++
++ if (pte_dirty(pte[i]) && !PageDirty(page) &&
++ !(PageAnon(page) && PageSwapBacked(page) &&
++ !PageSwapCache(page)))
++ set_page_dirty(page);
++
++ old_gen = page_lru_gen(page);
++ if (old_gen < 0)
++ SetPageReferenced(page);
++ else if (old_gen != new_gen)
++ __set_bit(i, bitmap);
++ }
++
++ arch_leave_lazy_mmu_mode();
++ rcu_read_unlock();
++
++ if (bitmap_weight(bitmap, MIN_LRU_BATCH) < PAGEVEC_SIZE) {
++ for_each_set_bit(i, bitmap, MIN_LRU_BATCH) {
++ page = pte_page(pte[i]);
++ activate_page(page);
++ }
++ return;
++ }
++
++ /* page_update_gen() requires stable page_memcg() */
++ if (!mem_cgroup_trylock_pages(memcg))
++ return;
++
++ spin_lock_irq(&lruvec->lru_lock);
++ new_gen = lru_gen_from_seq(lruvec->lrugen.max_seq);
++
++ for_each_set_bit(i, bitmap, MIN_LRU_BATCH) {
++ page = compound_head(pte_page(pte[i]));
++ if (page_memcg_rcu(page) != memcg)
++ continue;
++
++ old_gen = page_update_gen(page, new_gen);
++ if (old_gen < 0 || old_gen == new_gen)
++ continue;
++
++ lru_gen_update_size(lruvec, page, old_gen, new_gen);
++ }
++
++ spin_unlock_irq(&lruvec->lru_lock);
++
++ mem_cgroup_unlock_pages();
++}
++
+ /******************************************************************************
+ * the eviction
+ ******************************************************************************/
+@@ -3250,6 +3428,12 @@ static bool sort_page(struct lruvec *lruvec, struct page *page, int tier_idx)
+ return true;
+ }
+
++ /* promoted */
++ if (gen != lru_gen_from_seq(lrugen->min_seq[type])) {
++ list_move(&page->lru, &lrugen->lists[gen][type][zone]);
++ return true;
++ }
++
+ /* protected */
+ if (tier > tier_idx) {
+ int hist = lru_hist_from_seq(lrugen->min_seq[type]);
+--
+2.40.0
+