diff options
Diffstat (limited to 'target/linux/generic/pending-5.15/020-07-mm-multigenerational-lru-user-interface.patch')
-rw-r--r-- | target/linux/generic/pending-5.15/020-07-mm-multigenerational-lru-user-interface.patch | 496 |
1 files changed, 496 insertions, 0 deletions
diff --git a/target/linux/generic/pending-5.15/020-07-mm-multigenerational-lru-user-interface.patch b/target/linux/generic/pending-5.15/020-07-mm-multigenerational-lru-user-interface.patch new file mode 100644 index 0000000000..a1a749fc38 --- /dev/null +++ b/target/linux/generic/pending-5.15/020-07-mm-multigenerational-lru-user-interface.patch @@ -0,0 +1,496 @@ +From 5cc7fdec54e87e32b4fb0f07d84b21769d5f8d92 Mon Sep 17 00:00:00 2001 +From: Yu Zhao <yuzhao@google.com> +Date: Mon, 25 Jan 2021 21:38:02 -0700 +Subject: [PATCH 08/10] mm: multigenerational lru: user interface + +Add /sys/kernel/mm/lru_gen/enabled to enable and disable the +multigenerational lru at runtime. + +Add /sys/kernel/mm/lru_gen/min_ttl_ms to protect the working set of a +given number of milliseconds. The OOM killer is invoked if this +working set cannot be kept in memory. + +Add /sys/kernel/debug/lru_gen to monitor the multigenerational lru and +invoke the aging and the eviction. This file has the following output: + memcg memcg_id memcg_path + node node_id + min_gen birth_time anon_size file_size + ... + max_gen birth_time anon_size file_size + +min_gen is the oldest generation number and max_gen is the youngest +generation number. birth_time is in milliseconds. anon_size and +file_size are in pages. + +This file takes the following input: + + memcg_id node_id max_gen [swappiness] [use_bloom_filter] + - memcg_id node_id min_gen [swappiness] [nr_to_reclaim] + +The first command line invokes the aging, which scans PTEs for +accessed pages and then creates the next generation max_gen+1. A swap +file and a non-zero swappiness, which overrides vm.swappiness, are +required to scan PTEs mapping anon pages. The second command line +invokes the eviction, which evicts generations less than or equal to +min_gen. min_gen should be less than max_gen-1 as max_gen and +max_gen-1 are not fully aged and therefore cannot be evicted. +Setting nr_to_reclaim to N limits the number of pages to evict. +Setting use_bloom_filter to 0 overrides the default behavior which +only scans PTE tables found populated. Multiple command lines are +supported, as is concatenation with delimiters "," and ";". + +Signed-off-by: Yu Zhao <yuzhao@google.com> +Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru> +Change-Id: I4448e60029badbe347aa3b624f429b280cc3a3d3 +--- + include/linux/nodemask.h | 1 + + mm/vmscan.c | 415 +++++++++++++++++++++++++++++++++++++++ + 2 files changed, 416 insertions(+) + +--- a/include/linux/nodemask.h ++++ b/include/linux/nodemask.h +@@ -485,6 +485,7 @@ static inline int num_node_state(enum no + #define first_online_node 0 + #define first_memory_node 0 + #define next_online_node(nid) (MAX_NUMNODES) ++#define next_memory_node(nid) (MAX_NUMNODES) + #define nr_node_ids 1U + #define nr_online_nodes 1U + +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -53,6 +53,8 @@ + #include <linux/memory.h> + #include <linux/pagewalk.h> + #include <linux/shmem_fs.h> ++#include <linux/ctype.h> ++#include <linux/debugfs.h> + + #include <asm/tlbflush.h> + #include <asm/div64.h> +@@ -4882,6 +4884,413 @@ unlock: + } + + /****************************************************************************** ++ * sysfs interface ++ ******************************************************************************/ ++ ++static ssize_t show_min_ttl(struct kobject *kobj, struct kobj_attribute *attr, char *buf) ++{ ++ return sprintf(buf, "%u\n", jiffies_to_msecs(READ_ONCE(lru_gen_min_ttl))); ++} ++ ++static ssize_t store_min_ttl(struct kobject *kobj, struct kobj_attribute *attr, ++ const char *buf, size_t len) ++{ ++ unsigned int msecs; ++ ++ if (kstrtouint(buf, 10, &msecs)) ++ return -EINVAL; ++ ++ WRITE_ONCE(lru_gen_min_ttl, msecs_to_jiffies(msecs)); ++ ++ return len; ++} ++ ++static struct kobj_attribute lru_gen_min_ttl_attr = __ATTR( ++ min_ttl_ms, 0644, show_min_ttl, store_min_ttl ++); ++ ++static ssize_t show_enable(struct kobject *kobj, struct kobj_attribute *attr, char *buf) ++{ ++ return snprintf(buf, PAGE_SIZE, "%d\n", lru_gen_enabled()); ++} ++ ++static ssize_t store_enable(struct kobject *kobj, struct kobj_attribute *attr, ++ const char *buf, size_t len) ++{ ++ bool enable; ++ ++ if (kstrtobool(buf, &enable)) ++ return -EINVAL; ++ ++ lru_gen_change_state(enable, true, false); ++ ++ return len; ++} ++ ++static struct kobj_attribute lru_gen_enabled_attr = __ATTR( ++ enabled, 0644, show_enable, store_enable ++); ++ ++static struct attribute *lru_gen_attrs[] = { ++ &lru_gen_min_ttl_attr.attr, ++ &lru_gen_enabled_attr.attr, ++ NULL ++}; ++ ++static struct attribute_group lru_gen_attr_group = { ++ .name = "lru_gen", ++ .attrs = lru_gen_attrs, ++}; ++ ++/****************************************************************************** ++ * debugfs interface ++ ******************************************************************************/ ++ ++static void *lru_gen_seq_start(struct seq_file *m, loff_t *pos) ++{ ++ struct mem_cgroup *memcg; ++ loff_t nr_to_skip = *pos; ++ ++ m->private = kvmalloc(PATH_MAX, GFP_KERNEL); ++ if (!m->private) ++ return ERR_PTR(-ENOMEM); ++ ++ memcg = mem_cgroup_iter(NULL, NULL, NULL); ++ do { ++ int nid; ++ ++ for_each_node_state(nid, N_MEMORY) { ++ if (!nr_to_skip--) ++ return get_lruvec(nid, memcg); ++ } ++ } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL))); ++ ++ return NULL; ++} ++ ++static void lru_gen_seq_stop(struct seq_file *m, void *v) ++{ ++ if (!IS_ERR_OR_NULL(v)) ++ mem_cgroup_iter_break(NULL, lruvec_memcg(v)); ++ ++ kvfree(m->private); ++ m->private = NULL; ++} ++ ++static void *lru_gen_seq_next(struct seq_file *m, void *v, loff_t *pos) ++{ ++ int nid = lruvec_pgdat(v)->node_id; ++ struct mem_cgroup *memcg = lruvec_memcg(v); ++ ++ ++*pos; ++ ++ nid = next_memory_node(nid); ++ if (nid == MAX_NUMNODES) { ++ memcg = mem_cgroup_iter(NULL, memcg, NULL); ++ if (!memcg) ++ return NULL; ++ ++ nid = first_memory_node; ++ } ++ ++ return get_lruvec(nid, memcg); ++} ++ ++static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec, ++ unsigned long max_seq, unsigned long *min_seq, ++ unsigned long seq) ++{ ++ int i; ++ int type, tier; ++ int hist = lru_hist_from_seq(seq); ++ struct lrugen *lrugen = &lruvec->evictable; ++ ++ for (tier = 0; tier < MAX_NR_TIERS; tier++) { ++ seq_printf(m, " %10d", tier); ++ for (type = 0; type < ANON_AND_FILE; type++) { ++ unsigned long n[3] = {}; ++ ++ if (seq == max_seq) { ++ n[0] = READ_ONCE(lrugen->avg_refaulted[type][tier]); ++ n[1] = READ_ONCE(lrugen->avg_total[type][tier]); ++ ++ seq_printf(m, " %10luR %10luT %10lu ", n[0], n[1], n[2]); ++ } else if (seq == min_seq[type] || NR_HIST_GENS > 1) { ++ n[0] = atomic_long_read(&lrugen->refaulted[hist][type][tier]); ++ n[1] = atomic_long_read(&lrugen->evicted[hist][type][tier]); ++ if (tier) ++ n[2] = READ_ONCE(lrugen->protected[hist][type][tier - 1]); ++ ++ seq_printf(m, " %10lur %10lue %10lup", n[0], n[1], n[2]); ++ } else ++ seq_puts(m, " 0 0 0 "); ++ } ++ seq_putc(m, '\n'); ++ } ++ ++ seq_puts(m, " "); ++ for (i = 0; i < NR_MM_STATS; i++) { ++ if (seq == max_seq && NR_HIST_GENS == 1) ++ seq_printf(m, " %10lu%c", READ_ONCE(lruvec->mm_walk.stats[hist][i]), ++ toupper(MM_STAT_CODES[i])); ++ else if (seq != max_seq && NR_HIST_GENS > 1) ++ seq_printf(m, " %10lu%c", READ_ONCE(lruvec->mm_walk.stats[hist][i]), ++ MM_STAT_CODES[i]); ++ else ++ seq_puts(m, " 0 "); ++ } ++ seq_putc(m, '\n'); ++} ++ ++static int lru_gen_seq_show(struct seq_file *m, void *v) ++{ ++ unsigned long seq; ++ bool full = !debugfs_real_fops(m->file)->write; ++ struct lruvec *lruvec = v; ++ struct lrugen *lrugen = &lruvec->evictable; ++ int nid = lruvec_pgdat(lruvec)->node_id; ++ struct mem_cgroup *memcg = lruvec_memcg(lruvec); ++ DEFINE_MAX_SEQ(lruvec); ++ DEFINE_MIN_SEQ(lruvec); ++ ++ if (nid == first_memory_node) { ++ const char *path = memcg ? m->private : ""; ++ ++#ifdef CONFIG_MEMCG ++ if (memcg) ++ cgroup_path(memcg->css.cgroup, m->private, PATH_MAX); ++#endif ++ seq_printf(m, "memcg %5hu %s\n", mem_cgroup_id(memcg), path); ++ } ++ ++ seq_printf(m, " node %5d\n", nid); ++ ++ if (!full) ++ seq = min_seq[0]; ++ else if (max_seq >= MAX_NR_GENS) ++ seq = max_seq - MAX_NR_GENS + 1; ++ else ++ seq = 0; ++ ++ for (; seq <= max_seq; seq++) { ++ int gen, type, zone; ++ unsigned int msecs; ++ ++ gen = lru_gen_from_seq(seq); ++ msecs = jiffies_to_msecs(jiffies - READ_ONCE(lrugen->timestamps[gen])); ++ ++ seq_printf(m, " %10lu %10u", seq, msecs); ++ ++ for (type = 0; type < ANON_AND_FILE; type++) { ++ long size = 0; ++ ++ if (seq < min_seq[type]) { ++ seq_puts(m, " -0 "); ++ continue; ++ } ++ ++ for (zone = 0; zone < MAX_NR_ZONES; zone++) ++ size += READ_ONCE(lrugen->sizes[gen][type][zone]); ++ ++ seq_printf(m, " %10lu ", max(size, 0L)); ++ } ++ ++ seq_putc(m, '\n'); ++ ++ if (full) ++ lru_gen_seq_show_full(m, lruvec, max_seq, min_seq, seq); ++ } ++ ++ return 0; ++} ++ ++static const struct seq_operations lru_gen_seq_ops = { ++ .start = lru_gen_seq_start, ++ .stop = lru_gen_seq_stop, ++ .next = lru_gen_seq_next, ++ .show = lru_gen_seq_show, ++}; ++ ++static int run_aging(struct lruvec *lruvec, struct scan_control *sc, int swappiness, ++ unsigned long seq, bool use_filter) ++{ ++ DEFINE_MAX_SEQ(lruvec); ++ ++ if (seq == max_seq) ++ try_to_inc_max_seq(lruvec, sc, swappiness, max_seq, use_filter); ++ ++ return seq > max_seq ? -EINVAL : 0; ++} ++ ++static int run_eviction(struct lruvec *lruvec, struct scan_control *sc, int swappiness, ++ unsigned long seq, unsigned long nr_to_reclaim) ++{ ++ struct blk_plug plug; ++ int err = -EINTR; ++ DEFINE_MAX_SEQ(lruvec); ++ ++ if (seq >= max_seq - 1) ++ return -EINVAL; ++ ++ sc->nr_reclaimed = 0; ++ ++ blk_start_plug(&plug); ++ ++ while (!signal_pending(current)) { ++ DEFINE_MIN_SEQ(lruvec); ++ ++ if (seq < min_seq[!swappiness] || sc->nr_reclaimed >= nr_to_reclaim || ++ !evict_pages(lruvec, sc, swappiness)) { ++ err = 0; ++ break; ++ } ++ ++ cond_resched(); ++ } ++ ++ blk_finish_plug(&plug); ++ ++ return err; ++} ++ ++static int run_cmd(char cmd, int memcg_id, int nid, struct scan_control *sc, ++ int swappiness, unsigned long seq, unsigned long opt) ++{ ++ struct lruvec *lruvec; ++ int err = -EINVAL; ++ struct mem_cgroup *memcg = NULL; ++ ++ if (!mem_cgroup_disabled()) { ++ rcu_read_lock(); ++ memcg = mem_cgroup_from_id(memcg_id); ++#ifdef CONFIG_MEMCG ++ if (memcg && !css_tryget(&memcg->css)) ++ memcg = NULL; ++#endif ++ rcu_read_unlock(); ++ ++ if (!memcg) ++ goto done; ++ } ++ if (memcg_id != mem_cgroup_id(memcg)) ++ goto done; ++ ++ if (nid < 0 || nid >= MAX_NUMNODES || !node_state(nid, N_MEMORY)) ++ goto done; ++ ++ lruvec = get_lruvec(nid, memcg); ++ ++ if (swappiness < 0) ++ swappiness = get_swappiness(memcg); ++ else if (swappiness > 200) ++ goto done; ++ ++ switch (cmd) { ++ case '+': ++ err = run_aging(lruvec, sc, swappiness, seq, opt); ++ break; ++ case '-': ++ err = run_eviction(lruvec, sc, swappiness, seq, opt); ++ break; ++ } ++done: ++ mem_cgroup_put(memcg); ++ ++ return err; ++} ++ ++static ssize_t lru_gen_seq_write(struct file *file, const char __user *src, ++ size_t len, loff_t *pos) ++{ ++ void *buf; ++ char *cur, *next; ++ unsigned int flags; ++ int err = 0; ++ struct scan_control sc = { ++ .may_writepage = 1, ++ .may_unmap = 1, ++ .may_swap = 1, ++ .reclaim_idx = MAX_NR_ZONES - 1, ++ .gfp_mask = GFP_KERNEL, ++ }; ++ ++ buf = kvmalloc(len + 1, GFP_KERNEL); ++ if (!buf) ++ return -ENOMEM; ++ ++ if (copy_from_user(buf, src, len)) { ++ kvfree(buf); ++ return -EFAULT; ++ } ++ ++ next = buf; ++ next[len] = '\0'; ++ ++ sc.reclaim_state.mm_walk_args = alloc_mm_walk_args(); ++ if (!sc.reclaim_state.mm_walk_args) { ++ kvfree(buf); ++ return -ENOMEM; ++ } ++ ++ flags = memalloc_noreclaim_save(); ++ set_task_reclaim_state(current, &sc.reclaim_state); ++ ++ while ((cur = strsep(&next, ",;\n"))) { ++ int n; ++ int end; ++ char cmd; ++ unsigned int memcg_id; ++ unsigned int nid; ++ unsigned long seq; ++ unsigned int swappiness = -1; ++ unsigned long opt = -1; ++ ++ cur = skip_spaces(cur); ++ if (!*cur) ++ continue; ++ ++ n = sscanf(cur, "%c %u %u %lu %n %u %n %lu %n", &cmd, &memcg_id, &nid, ++ &seq, &end, &swappiness, &end, &opt, &end); ++ if (n < 4 || cur[end]) { ++ err = -EINVAL; ++ break; ++ } ++ ++ err = run_cmd(cmd, memcg_id, nid, &sc, swappiness, seq, opt); ++ if (err) ++ break; ++ } ++ ++ set_task_reclaim_state(current, NULL); ++ memalloc_noreclaim_restore(flags); ++ ++ free_mm_walk_args(sc.reclaim_state.mm_walk_args); ++ kvfree(buf); ++ ++ return err ? : len; ++} ++ ++static int lru_gen_seq_open(struct inode *inode, struct file *file) ++{ ++ return seq_open(file, &lru_gen_seq_ops); ++} ++ ++static const struct file_operations lru_gen_rw_fops = { ++ .open = lru_gen_seq_open, ++ .read = seq_read, ++ .write = lru_gen_seq_write, ++ .llseek = seq_lseek, ++ .release = seq_release, ++}; ++ ++static const struct file_operations lru_gen_ro_fops = { ++ .open = lru_gen_seq_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = seq_release, ++}; ++ ++/****************************************************************************** + * initialization + ******************************************************************************/ + +@@ -4951,6 +5360,12 @@ static int __init init_lru_gen(void) + BUILD_BUG_ON(BIT(LRU_GEN_WIDTH) <= MAX_NR_GENS); + BUILD_BUG_ON(sizeof(MM_STAT_CODES) != NR_MM_STATS + 1); + ++ if (sysfs_create_group(mm_kobj, &lru_gen_attr_group)) ++ pr_err("lru_gen: failed to create sysfs group\n"); ++ ++ debugfs_create_file("lru_gen", 0644, NULL, NULL, &lru_gen_rw_fops); ++ debugfs_create_file("lru_gen_full", 0444, NULL, NULL, &lru_gen_ro_fops); ++ + return 0; + }; + late_initcall(init_lru_gen); |