aboutsummaryrefslogtreecommitdiffstats
path: root/target/linux/generic/pending-5.15/020-04-mm-multigenerational-lru-mm_struct-list.patch
diff options
context:
space:
mode:
Diffstat (limited to 'target/linux/generic/pending-5.15/020-04-mm-multigenerational-lru-mm_struct-list.patch')
-rw-r--r--target/linux/generic/pending-5.15/020-04-mm-multigenerational-lru-mm_struct-list.patch760
1 files changed, 760 insertions, 0 deletions
diff --git a/target/linux/generic/pending-5.15/020-04-mm-multigenerational-lru-mm_struct-list.patch b/target/linux/generic/pending-5.15/020-04-mm-multigenerational-lru-mm_struct-list.patch
new file mode 100644
index 0000000000..75fd39d99d
--- /dev/null
+++ b/target/linux/generic/pending-5.15/020-04-mm-multigenerational-lru-mm_struct-list.patch
@@ -0,0 +1,760 @@
+From 534bcc4a0bb5b24600891ce793f0295a142e9dae Mon Sep 17 00:00:00 2001
+From: Yu Zhao <yuzhao@google.com>
+Date: Mon, 5 Apr 2021 04:17:41 -0600
+Subject: [PATCH 05/10] mm: multigenerational lru: mm_struct list
+
+To scan PTEs for accessed pages, a mm_struct list is maintained for
+each memcg. When multiple threads traverse the same memcg->mm_list,
+each of them gets a unique mm_struct and therefore they can run
+walk_page_range() concurrently to reach page tables of all processes
+of this memcg.
+
+This infrastructure also provides the following optimizations:
+ 1) it allows walkers to skip processes that have been sleeping since
+ the last walk by tracking the usage of mm_struct between context
+ switches.
+ 2) it allows walkers to add interesting items they find during a
+ walk to a Bloom filter so that they can skip uninteresting items
+ during the next walk by testing whether an item is in this Bloom
+ filter.
+
+Signed-off-by: Yu Zhao <yuzhao@google.com>
+Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
+Change-Id: I25d9eda8c6bdc7c3653b9f210a159d6c247c81e8
+---
+ fs/exec.c | 2 +
+ include/linux/memcontrol.h | 4 +
+ include/linux/mm_inline.h | 6 +
+ include/linux/mm_types.h | 75 +++++++++
+ include/linux/mmzone.h | 63 +++++++
+ kernel/exit.c | 1 +
+ kernel/fork.c | 9 +
+ kernel/sched/core.c | 1 +
+ mm/memcontrol.c | 25 +++
+ mm/vmscan.c | 331 +++++++++++++++++++++++++++++++++++++
+ 10 files changed, 517 insertions(+)
+
+--- a/fs/exec.c
++++ b/fs/exec.c
+@@ -1013,6 +1013,7 @@ static int exec_mmap(struct mm_struct *m
+ active_mm = tsk->active_mm;
+ tsk->active_mm = mm;
+ tsk->mm = mm;
++ lru_gen_add_mm(mm);
+ /*
+ * This prevents preemption while active_mm is being loaded and
+ * it and mm are being updated, which could cause problems for
+@@ -1023,6 +1024,7 @@ static int exec_mmap(struct mm_struct *m
+ if (!IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
+ local_irq_enable();
+ activate_mm(active_mm, mm);
++ lru_gen_activate_mm(mm);
+ if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
+ local_irq_enable();
+ tsk->mm->vmacache_seqnum = 0;
+--- a/include/linux/memcontrol.h
++++ b/include/linux/memcontrol.h
+@@ -348,6 +348,10 @@ struct mem_cgroup {
+ struct deferred_split deferred_split_queue;
+ #endif
+
++#ifdef CONFIG_LRU_GEN
++ struct lru_gen_mm_list mm_list;
++#endif
++
+ struct mem_cgroup_per_node *nodeinfo[];
+ };
+
+--- a/include/linux/mm_inline.h
++++ b/include/linux/mm_inline.h
+@@ -100,6 +100,12 @@ static inline int lru_gen_from_seq(unsig
+ return seq % MAX_NR_GENS;
+ }
+
++/* Return a proper index regardless whether we keep stats for historical generations. */
++static inline int lru_hist_from_seq(unsigned long seq)
++{
++ return seq % NR_HIST_GENS;
++}
++
+ /* The youngest and the second youngest generations are counted as active. */
+ static inline bool lru_gen_is_active(struct lruvec *lruvec, int gen)
+ {
+--- a/include/linux/mm_types.h
++++ b/include/linux/mm_types.h
+@@ -3,6 +3,7 @@
+ #define _LINUX_MM_TYPES_H
+
+ #include <linux/mm_types_task.h>
++#include <linux/sched.h>
+
+ #include <linux/auxvec.h>
+ #include <linux/list.h>
+@@ -15,6 +16,8 @@
+ #include <linux/page-flags-layout.h>
+ #include <linux/workqueue.h>
+ #include <linux/seqlock.h>
++#include <linux/nodemask.h>
++#include <linux/mmdebug.h>
+
+ #include <asm/mmu.h>
+
+@@ -580,6 +583,18 @@ struct mm_struct {
+ #ifdef CONFIG_IOMMU_SUPPORT
+ u32 pasid;
+ #endif
++#ifdef CONFIG_LRU_GEN
++ struct {
++ /* the node of a global or per-memcg mm_struct list */
++ struct list_head list;
++#ifdef CONFIG_MEMCG
++ /* points to the memcg of the owner task above */
++ struct mem_cgroup *memcg;
++#endif
++ /* whether this mm_struct has been used since the last walk */
++ nodemask_t nodes;
++ } lrugen;
++#endif /* CONFIG_LRU_GEN */
+ } __randomize_layout;
+
+ /*
+@@ -606,6 +621,66 @@ static inline cpumask_t *mm_cpumask(stru
+ return (struct cpumask *)&mm->cpu_bitmap;
+ }
+
++#ifdef CONFIG_LRU_GEN
++
++struct lru_gen_mm_list {
++ /* a global or per-memcg mm_struct list */
++ struct list_head fifo;
++ /* protects the list above */
++ spinlock_t lock;
++};
++
++void lru_gen_add_mm(struct mm_struct *mm);
++void lru_gen_del_mm(struct mm_struct *mm);
++#ifdef CONFIG_MEMCG
++void lru_gen_migrate_mm(struct mm_struct *mm);
++#endif
++
++static inline void lru_gen_init_mm(struct mm_struct *mm)
++{
++ INIT_LIST_HEAD(&mm->lrugen.list);
++#ifdef CONFIG_MEMCG
++ mm->lrugen.memcg = NULL;
++#endif
++ nodes_clear(mm->lrugen.nodes);
++}
++
++/* Track the usage of each mm_struct so that we can skip inactive ones. */
++static inline void lru_gen_activate_mm(struct mm_struct *mm)
++{
++ /* unlikely but not a bug when racing with lru_gen_migrate_mm() */
++ VM_WARN_ON(list_empty(&mm->lrugen.list));
++
++ if (!(current->flags & PF_KTHREAD) && !nodes_full(mm->lrugen.nodes))
++ nodes_setall(mm->lrugen.nodes);
++}
++
++#else /* !CONFIG_LRU_GEN */
++
++static inline void lru_gen_add_mm(struct mm_struct *mm)
++{
++}
++
++static inline void lru_gen_del_mm(struct mm_struct *mm)
++{
++}
++
++#ifdef CONFIG_MEMCG
++static inline void lru_gen_migrate_mm(struct mm_struct *mm)
++{
++}
++#endif
++
++static inline void lru_gen_init_mm(struct mm_struct *mm)
++{
++}
++
++static inline void lru_gen_activate_mm(struct mm_struct *mm)
++{
++}
++
++#endif /* CONFIG_LRU_GEN */
++
+ struct mmu_gather;
+ extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm);
+ extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm);
+--- a/include/linux/mmzone.h
++++ b/include/linux/mmzone.h
+@@ -318,6 +318,13 @@ struct lruvec;
+ #define MIN_NR_GENS 2
+ #define MAX_NR_GENS ((unsigned int)CONFIG_NR_LRU_GENS)
+
++/* Whether to keep stats for historical generations. */
++#ifdef CONFIG_LRU_GEN_STATS
++#define NR_HIST_GENS ((unsigned int)CONFIG_NR_LRU_GENS)
++#else
++#define NR_HIST_GENS 1U
++#endif
++
+ struct lrugen {
+ /* the aging increments the max generation number */
+ unsigned long max_seq;
+@@ -333,13 +340,63 @@ struct lrugen {
+ bool enabled[ANON_AND_FILE];
+ };
+
++enum {
++ MM_LEAF_TOTAL, /* total leaf entries */
++ MM_LEAF_OLD, /* old leaf entries */
++ MM_LEAF_YOUNG, /* young leaf entries */
++ MM_NONLEAF_TOTAL, /* total non-leaf entries */
++ MM_NONLEAF_PREV, /* previously worthy non-leaf entries */
++ MM_NONLEAF_CUR, /* currently worthy non-leaf entries */
++ NR_MM_STATS
++};
++
++/* mnemonic codes for the stats above */
++#define MM_STAT_CODES "toydpc"
++
++/* double buffering bloom filters */
++#define NR_BLOOM_FILTERS 2
++
++struct lru_gen_mm_walk {
++ /* set to max_seq after each round of walk */
++ unsigned long seq;
++ /* the next mm_struct on the list to walk */
++ struct list_head *head;
++ /* the first mm_struct never walked before */
++ struct list_head *tail;
++ /* to wait for the last walker to finish */
++ struct wait_queue_head wait;
++ /* bloom filters flip after each round of walk */
++ unsigned long *filters[NR_BLOOM_FILTERS];
++ /* page table stats for debugging */
++ unsigned long stats[NR_HIST_GENS][NR_MM_STATS];
++ /* the number of concurrent walkers */
++ int nr_walkers;
++};
++
++#define MIN_BATCH_SIZE 64
+ #define MAX_BATCH_SIZE 8192
+
++struct mm_walk_args {
++ struct mem_cgroup *memcg;
++ unsigned long max_seq;
++ unsigned long start_pfn;
++ unsigned long end_pfn;
++ unsigned long next_addr;
++ unsigned long bitmap[BITS_TO_LONGS(MIN_BATCH_SIZE)];
++ int node_id;
++ int swappiness;
++ int batch_size;
++ int nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
++ int mm_stats[NR_MM_STATS];
++ bool use_filter;
++};
++
+ void lru_gen_init_state(struct mem_cgroup *memcg, struct lruvec *lruvec);
+ void lru_gen_change_state(bool enable, bool main, bool swap);
+
+ #ifdef CONFIG_MEMCG
+ void lru_gen_init_memcg(struct mem_cgroup *memcg);
++void lru_gen_free_memcg(struct mem_cgroup *memcg);
+ #endif
+
+ #else /* !CONFIG_LRU_GEN */
+@@ -356,6 +413,10 @@ static inline void lru_gen_change_state(
+ static inline void lru_gen_init_memcg(struct mem_cgroup *memcg)
+ {
+ }
++
++static inline void lru_gen_free_memcg(struct mem_cgroup *memcg)
++{
++}
+ #endif
+
+ #endif /* CONFIG_LRU_GEN */
+@@ -380,6 +441,8 @@ struct lruvec {
+ #ifdef CONFIG_LRU_GEN
+ /* unevictable pages are on LRU_UNEVICTABLE */
+ struct lrugen evictable;
++ /* state for mm list and page table walks */
++ struct lru_gen_mm_walk mm_walk;
+ #endif
+ #ifdef CONFIG_MEMCG
+ struct pglist_data *pgdat;
+--- a/kernel/exit.c
++++ b/kernel/exit.c
+@@ -422,6 +422,7 @@ assign_new_owner:
+ goto retry;
+ }
+ WRITE_ONCE(mm->owner, c);
++ lru_gen_migrate_mm(mm);
+ task_unlock(c);
+ put_task_struct(c);
+ }
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -1080,6 +1080,7 @@ static struct mm_struct *mm_init(struct
+ goto fail_nocontext;
+
+ mm->user_ns = get_user_ns(user_ns);
++ lru_gen_init_mm(mm);
+ return mm;
+
+ fail_nocontext:
+@@ -1122,6 +1123,7 @@ static inline void __mmput(struct mm_str
+ }
+ if (mm->binfmt)
+ module_put(mm->binfmt->module);
++ lru_gen_del_mm(mm);
+ mmdrop(mm);
+ }
+
+@@ -2616,6 +2618,13 @@ pid_t kernel_clone(struct kernel_clone_a
+ get_task_struct(p);
+ }
+
++ if (IS_ENABLED(CONFIG_LRU_GEN) && !(clone_flags & CLONE_VM)) {
++ /* lock the task to synchronize with memcg migration */
++ task_lock(p);
++ lru_gen_add_mm(p->mm);
++ task_unlock(p);
++ }
++
+ wake_up_new_task(p);
+
+ /* forking complete and child started to run, tell ptracer */
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -4978,6 +4978,7 @@ context_switch(struct rq *rq, struct tas
+ * finish_task_switch()'s mmdrop().
+ */
+ switch_mm_irqs_off(prev->active_mm, next->mm, next);
++ lru_gen_activate_mm(next->mm);
+
+ if (!prev->mm) { // from kernel
+ /* will mmdrop() in finish_task_switch(). */
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -5163,6 +5163,7 @@ static void __mem_cgroup_free(struct mem
+
+ static void mem_cgroup_free(struct mem_cgroup *memcg)
+ {
++ lru_gen_free_memcg(memcg);
+ memcg_wb_domain_exit(memcg);
+ __mem_cgroup_free(memcg);
+ }
+@@ -6195,6 +6196,29 @@ static void mem_cgroup_move_task(void)
+ }
+ #endif
+
++#ifdef CONFIG_LRU_GEN
++static void mem_cgroup_attach(struct cgroup_taskset *tset)
++{
++ struct cgroup_subsys_state *css;
++ struct task_struct *task = NULL;
++
++ cgroup_taskset_for_each_leader(task, css, tset)
++ break;
++
++ if (!task)
++ return;
++
++ task_lock(task);
++ if (task->mm && task->mm->owner == task)
++ lru_gen_migrate_mm(task->mm);
++ task_unlock(task);
++}
++#else
++static void mem_cgroup_attach(struct cgroup_taskset *tset)
++{
++}
++#endif /* CONFIG_LRU_GEN */
++
+ static int seq_puts_memcg_tunable(struct seq_file *m, unsigned long value)
+ {
+ if (value == PAGE_COUNTER_MAX)
+@@ -6538,6 +6562,7 @@ struct cgroup_subsys memory_cgrp_subsys
+ .css_reset = mem_cgroup_css_reset,
+ .css_rstat_flush = mem_cgroup_css_rstat_flush,
+ .can_attach = mem_cgroup_can_attach,
++ .attach = mem_cgroup_attach,
+ .cancel_attach = mem_cgroup_cancel_attach,
+ .post_attach = mem_cgroup_move_task,
+ .dfl_cftypes = memory_files,
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -2929,6 +2929,306 @@ static bool __maybe_unused seq_is_valid(
+ }
+
+ /******************************************************************************
++ * mm_struct list
++ ******************************************************************************/
++
++static struct lru_gen_mm_list *get_mm_list(struct mem_cgroup *memcg)
++{
++ static struct lru_gen_mm_list mm_list = {
++ .fifo = LIST_HEAD_INIT(mm_list.fifo),
++ .lock = __SPIN_LOCK_UNLOCKED(mm_list.lock),
++ };
++
++#ifdef CONFIG_MEMCG
++ if (memcg)
++ return &memcg->mm_list;
++#endif
++ return &mm_list;
++}
++
++void lru_gen_add_mm(struct mm_struct *mm)
++{
++ int nid;
++ struct mem_cgroup *memcg = get_mem_cgroup_from_mm(mm);
++ struct lru_gen_mm_list *mm_list = get_mm_list(memcg);
++
++ VM_BUG_ON_MM(!list_empty(&mm->lrugen.list), mm);
++#ifdef CONFIG_MEMCG
++ VM_BUG_ON_MM(mm->lrugen.memcg, mm);
++ mm->lrugen.memcg = memcg;
++#endif
++ spin_lock(&mm_list->lock);
++
++ list_add_tail(&mm->lrugen.list, &mm_list->fifo);
++
++ for_each_node(nid) {
++ struct lruvec *lruvec = get_lruvec(nid, memcg);
++
++ if (!lruvec)
++ continue;
++
++ if (lruvec->mm_walk.tail == &mm_list->fifo)
++ lruvec->mm_walk.tail = lruvec->mm_walk.tail->prev;
++ }
++
++ spin_unlock(&mm_list->lock);
++}
++
++void lru_gen_del_mm(struct mm_struct *mm)
++{
++ int nid;
++ struct lru_gen_mm_list *mm_list;
++ struct mem_cgroup *memcg = NULL;
++
++ if (list_empty(&mm->lrugen.list))
++ return;
++
++#ifdef CONFIG_MEMCG
++ memcg = mm->lrugen.memcg;
++#endif
++ mm_list = get_mm_list(memcg);
++
++ spin_lock(&mm_list->lock);
++
++ for_each_node(nid) {
++ struct lruvec *lruvec = get_lruvec(nid, memcg);
++
++ if (!lruvec)
++ continue;
++
++ if (lruvec->mm_walk.tail == &mm->lrugen.list)
++ lruvec->mm_walk.tail = lruvec->mm_walk.tail->next;
++
++ if (lruvec->mm_walk.head != &mm->lrugen.list)
++ continue;
++
++ lruvec->mm_walk.head = lruvec->mm_walk.head->next;
++ if (lruvec->mm_walk.head == &mm_list->fifo)
++ WRITE_ONCE(lruvec->mm_walk.seq, lruvec->mm_walk.seq + 1);
++ }
++
++ list_del_init(&mm->lrugen.list);
++
++ spin_unlock(&mm_list->lock);
++
++#ifdef CONFIG_MEMCG
++ mem_cgroup_put(mm->lrugen.memcg);
++ mm->lrugen.memcg = NULL;
++#endif
++}
++
++#ifdef CONFIG_MEMCG
++void lru_gen_migrate_mm(struct mm_struct *mm)
++{
++ struct mem_cgroup *memcg;
++
++ lockdep_assert_held(&mm->owner->alloc_lock);
++
++ if (mem_cgroup_disabled())
++ return;
++
++ rcu_read_lock();
++ memcg = mem_cgroup_from_task(mm->owner);
++ rcu_read_unlock();
++ if (memcg == mm->lrugen.memcg)
++ return;
++
++ VM_BUG_ON_MM(!mm->lrugen.memcg, mm);
++ VM_BUG_ON_MM(list_empty(&mm->lrugen.list), mm);
++
++ lru_gen_del_mm(mm);
++ lru_gen_add_mm(mm);
++}
++#endif
++
++#define BLOOM_FILTER_SHIFT 15
++
++static inline int filter_gen_from_seq(unsigned long seq)
++{
++ return seq % NR_BLOOM_FILTERS;
++}
++
++static void get_item_key(void *item, int *key)
++{
++ u32 hash = hash_ptr(item, BLOOM_FILTER_SHIFT * 2);
++
++ BUILD_BUG_ON(BLOOM_FILTER_SHIFT * 2 > BITS_PER_TYPE(u32));
++
++ key[0] = hash & (BIT(BLOOM_FILTER_SHIFT) - 1);
++ key[1] = hash >> BLOOM_FILTER_SHIFT;
++}
++
++static void clear_bloom_filter(struct lruvec *lruvec, unsigned long seq)
++{
++ unsigned long *filter;
++ int gen = filter_gen_from_seq(seq);
++
++ lockdep_assert_held(&get_mm_list(lruvec_memcg(lruvec))->lock);
++
++ filter = lruvec->mm_walk.filters[gen];
++ if (filter) {
++ bitmap_clear(filter, 0, BIT(BLOOM_FILTER_SHIFT));
++ return;
++ }
++
++ filter = bitmap_zalloc(BIT(BLOOM_FILTER_SHIFT), GFP_ATOMIC);
++ WRITE_ONCE(lruvec->mm_walk.filters[gen], filter);
++}
++
++static void set_bloom_filter(struct lruvec *lruvec, unsigned long seq, void *item)
++{
++ int key[2];
++ unsigned long *filter;
++ int gen = filter_gen_from_seq(seq);
++
++ filter = READ_ONCE(lruvec->mm_walk.filters[gen]);
++ if (!filter)
++ return;
++
++ get_item_key(item, key);
++
++ if (!test_bit(key[0], filter))
++ set_bit(key[0], filter);
++ if (!test_bit(key[1], filter))
++ set_bit(key[1], filter);
++}
++
++static bool test_bloom_filter(struct lruvec *lruvec, unsigned long seq, void *item)
++{
++ int key[2];
++ unsigned long *filter;
++ int gen = filter_gen_from_seq(seq);
++
++ filter = READ_ONCE(lruvec->mm_walk.filters[gen]);
++ if (!filter)
++ return false;
++
++ get_item_key(item, key);
++
++ return test_bit(key[0], filter) && test_bit(key[1], filter);
++}
++
++static void reset_mm_stats(struct lruvec *lruvec, bool last, struct mm_walk_args *args)
++{
++ int i;
++ int hist = lru_hist_from_seq(args->max_seq);
++
++ lockdep_assert_held(&get_mm_list(lruvec_memcg(lruvec))->lock);
++
++ for (i = 0; i < NR_MM_STATS; i++) {
++ WRITE_ONCE(lruvec->mm_walk.stats[hist][i],
++ lruvec->mm_walk.stats[hist][i] + args->mm_stats[i]);
++ args->mm_stats[i] = 0;
++ }
++
++ if (!last || NR_HIST_GENS == 1)
++ return;
++
++ hist = lru_hist_from_seq(args->max_seq + 1);
++ for (i = 0; i < NR_MM_STATS; i++)
++ WRITE_ONCE(lruvec->mm_walk.stats[hist][i], 0);
++}
++
++static bool should_skip_mm(struct mm_struct *mm, struct mm_walk_args *args)
++{
++ int type;
++ unsigned long size = 0;
++
++ if (cpumask_empty(mm_cpumask(mm)) && !node_isset(args->node_id, mm->lrugen.nodes))
++ return true;
++
++ if (mm_is_oom_victim(mm))
++ return true;
++
++ for (type = !args->swappiness; type < ANON_AND_FILE; type++) {
++ size += type ? get_mm_counter(mm, MM_FILEPAGES) :
++ get_mm_counter(mm, MM_ANONPAGES) +
++ get_mm_counter(mm, MM_SHMEMPAGES);
++ }
++
++ if (size < MIN_BATCH_SIZE)
++ return true;
++
++ if (!mmget_not_zero(mm))
++ return true;
++
++ node_clear(args->node_id, mm->lrugen.nodes);
++
++ return false;
++}
++
++/* To support multiple walkers that concurrently walk an mm_struct list. */
++static bool get_next_mm(struct lruvec *lruvec, struct mm_walk_args *args,
++ struct mm_struct **iter)
++{
++ bool first = false;
++ bool last = true;
++ struct mm_struct *mm = NULL;
++ struct lru_gen_mm_walk *mm_walk = &lruvec->mm_walk;
++ struct lru_gen_mm_list *mm_list = get_mm_list(args->memcg);
++
++ if (*iter)
++ mmput_async(*iter);
++ else if (args->max_seq <= READ_ONCE(mm_walk->seq))
++ return false;
++
++ spin_lock(&mm_list->lock);
++
++ VM_BUG_ON(args->max_seq > mm_walk->seq + 1);
++ VM_BUG_ON(*iter && args->max_seq < mm_walk->seq);
++ VM_BUG_ON(*iter && !mm_walk->nr_walkers);
++
++ if (args->max_seq <= mm_walk->seq) {
++ if (!*iter)
++ last = false;
++ goto done;
++ }
++
++ if (mm_walk->head == &mm_list->fifo) {
++ VM_BUG_ON(mm_walk->nr_walkers);
++ mm_walk->head = mm_walk->head->next;
++ first = true;
++ }
++
++ while (!mm && mm_walk->head != &mm_list->fifo) {
++ mm = list_entry(mm_walk->head, struct mm_struct, lrugen.list);
++
++ mm_walk->head = mm_walk->head->next;
++
++ if (mm_walk->tail == &mm->lrugen.list) {
++ mm_walk->tail = mm_walk->tail->next;
++ args->use_filter = false;
++ }
++
++ if (should_skip_mm(mm, args))
++ mm = NULL;
++ }
++
++ if (mm_walk->head == &mm_list->fifo)
++ WRITE_ONCE(mm_walk->seq, mm_walk->seq + 1);
++done:
++ if (*iter && !mm)
++ mm_walk->nr_walkers--;
++ if (!*iter && mm)
++ mm_walk->nr_walkers++;
++
++ if (mm_walk->nr_walkers)
++ last = false;
++
++ if (mm && first)
++ clear_bloom_filter(lruvec, args->max_seq + 1);
++
++ if (*iter || last)
++ reset_mm_stats(lruvec, last, args);
++
++ spin_unlock(&mm_list->lock);
++
++ *iter = mm;
++
++ return last;
++}
++
++/******************************************************************************
+ * state change
+ ******************************************************************************/
+
+@@ -3112,6 +3412,7 @@ void lru_gen_init_state(struct mem_cgrou
+ int i;
+ int gen, type, zone;
+ struct lrugen *lrugen = &lruvec->evictable;
++ struct lru_gen_mm_list *mm_list = get_mm_list(memcg);
+
+ lrugen->max_seq = MIN_NR_GENS + 1;
+ lrugen->enabled[0] = lru_gen_enabled() && lru_gen_nr_swapfiles;
+@@ -3122,6 +3423,17 @@ void lru_gen_init_state(struct mem_cgrou
+
+ for_each_gen_type_zone(gen, type, zone)
+ INIT_LIST_HEAD(&lrugen->lists[gen][type][zone]);
++
++ if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG) && !memcg)
++ spin_lock(&mm_list->lock);
++
++ lruvec->mm_walk.seq = MIN_NR_GENS;
++ lruvec->mm_walk.head = &mm_list->fifo;
++ lruvec->mm_walk.tail = &mm_list->fifo;
++ init_waitqueue_head(&lruvec->mm_walk.wait);
++
++ if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG) && !memcg)
++ spin_unlock(&mm_list->lock);
+ }
+
+ #ifdef CONFIG_MEMCG
+@@ -3129,18 +3441,37 @@ void lru_gen_init_memcg(struct mem_cgrou
+ {
+ int nid;
+
++ INIT_LIST_HEAD(&memcg->mm_list.fifo);
++ spin_lock_init(&memcg->mm_list.lock);
++
+ for_each_node(nid) {
+ struct lruvec *lruvec = get_lruvec(nid, memcg);
+
+ lru_gen_init_state(memcg, lruvec);
+ }
+ }
++
++void lru_gen_free_memcg(struct mem_cgroup *memcg)
++{
++ int nid;
++
++ for_each_node(nid) {
++ int i;
++ struct lruvec *lruvec = get_lruvec(nid, memcg);
++
++ for (i = 0; i < NR_BLOOM_FILTERS; i++) {
++ bitmap_free(lruvec->mm_walk.filters[i]);
++ lruvec->mm_walk.filters[i] = NULL;
++ }
++ }
++}
+ #endif
+
+ static int __init init_lru_gen(void)
+ {
+ BUILD_BUG_ON(MIN_NR_GENS + 1 >= MAX_NR_GENS);
+ BUILD_BUG_ON(BIT(LRU_GEN_WIDTH) <= MAX_NR_GENS);
++ BUILD_BUG_ON(sizeof(MM_STAT_CODES) != NR_MM_STATS + 1);
+
+ return 0;
+ };