Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save laoar/90c6acb388cf541f297a7aca2f52b117 to your computer and use it in GitHub Desktop.
Save laoar/90c6acb388cf541f297a7aca2f52b117 to your computer and use it in GitHub Desktop.
mm, memcg: introduce multiple level memory low protection
From f16bb9724a4a9b802a981231c7021d2aea9f4dc2 Mon Sep 17 00:00:00 2001
From: Yafang Shao <[email protected]>
Date: Tue, 22 Oct 2019 22:17:15 -0400
Subject: [PATCH] mm, memcg: introduce multiple level memory low protection
This patch introduces a new memory controller file memory.low.level,
which is used to set multiple level memory.low protetion.
The valid value of memory.low.level is [0..3], meaning we support four
levels protection now. This new controller file takes effect only when
memory.low is set.
If both memory.low and memory.low.level are set to many MEMCGs, under
memory pressure the reclaimer will reclaim the unprotected memory first,
and then reclaims the protected memory with lower memory.low.level and at
last relcaims the protected memory with highest memory.low.level.
under_memory_pressure
reclaim_unprotected_memory
if (meet_the_request)
exit
reclaim_protected_memory_with_lowest_memory.low.level
if (meet_the_request)
exit
reclaim_protected_memory_with_higher_memory.low.level
if (meet_the_request)
exit
reclaim_protected_memory_with_highest_memory.low.level
One example how this multiple level controller works,
target
/ \
B C
/\
B1 B2
B/memory.low.level=2
B1/memory.low.level=3
B2/memory.low.level=0
C/memory.low.level=1
Then the reclaimer will reclaims these priority in this order: B2->C->B1/B
Signed-off-by: Yafang Shao <[email protected]>
---
Documentation/admin-guide/cgroup-v2.rst | 11 ++++++++
include/linux/page_counter.h | 3 +++
mm/memcontrol.c | 47 +++++++++++++++++++++++++++++++++
mm/vmscan.c | 27 +++++++++++++++----
4 files changed, 83 insertions(+), 5 deletions(-)
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 5361ebe..08675b6 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1136,6 +1136,17 @@ PAGE_SIZE multiple when read back.
Putting more memory than generally available under this
protection is discouraged.
+ memory.low.level
+ A read-write single value file which exists on non-root
+ cgroups. The default is "0". The valid value is [0..3].
+
+ The controller file takes effect only after memory.low is set.
+ If both memory.low and memory.low.level are set to many MEMCGs,
+ when under memory pressure the reclaimer will reclaim the
+ unprotected memory first, and then reclaims the protected memory
+ with lower memory.low.level and at last relcaims the protected
+ memory with highest memory.low.level.
+
memory.high
A read-write single value file which exists on non-root
cgroups. The default is "max".
diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h
index bab7e57..19bc589 100644
--- a/include/linux/page_counter.h
+++ b/include/linux/page_counter.h
@@ -6,6 +6,7 @@
#include <linux/kernel.h>
#include <asm/page.h>
+#define MEMCG_LOW_LEVEL_MAX 4
struct page_counter {
atomic_long_t usage;
unsigned long min;
@@ -22,6 +23,8 @@ struct page_counter {
unsigned long elow;
atomic_long_t low_usage;
atomic_long_t children_low_usage;
+ unsigned long low_level;
+ unsigned long elow_level;
/* legacy */
unsigned long watermark;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1c4c08b..4cbf765 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -6064,6 +6064,37 @@ static ssize_t memory_low_write(struct kernfs_open_file *of,
return nbytes;
}
+static int memory_low_level_show(struct seq_file *m, void *v)
+{
+ struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
+
+ seq_printf(m, "%lu\n", memcg->memory.low_level);
+
+ return 0;
+}
+
+static ssize_t memory_low_level_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off)
+{
+ struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+ int ret, low_level;
+
+ buf = strstrip(buf);
+ if (!buf)
+ return -EINVAL;
+
+ ret = kstrtoint(buf, 0, &low_level);
+ if (ret)
+ return ret;
+
+ if (low_level < 0 || low_level >= MEMCG_LOW_LEVEL_MAX)
+ return -EINVAL;
+
+ memcg->memory.low_level = low_level;
+
+ return nbytes;
+}
+
static int memory_high_show(struct seq_file *m, void *v)
{
return seq_puts_memcg_tunable(m, READ_ONCE(mem_cgroup_from_seq(m)->high));
@@ -6237,6 +6268,12 @@ static ssize_t memory_oom_group_write(struct kernfs_open_file *of,
.write = memory_low_write,
},
{
+ .name = "low.level",
+ .flags = CFTYPE_NOT_ON_ROOT,
+ .seq_show = memory_low_level_show,
+ .write = memory_low_level_write,
+ },
+ {
.name = "high",
.flags = CFTYPE_NOT_ON_ROOT,
.seq_show = memory_high_show,
@@ -6366,6 +6403,8 @@ enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root,
struct mem_cgroup *parent;
unsigned long emin, parent_emin;
unsigned long elow, parent_elow;
+ unsigned long elow_level;
+ unsigned long parent_elow_level;
unsigned long usage;
if (mem_cgroup_disabled())
@@ -6382,6 +6421,7 @@ enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root,
emin = memcg->memory.min;
elow = memcg->memory.low;
+ elow_level = memcg->memory.low_level;
parent = parent_mem_cgroup(memcg);
/* No parent means a non-hierarchical mode on v1 memcg */
@@ -6417,11 +6457,18 @@ enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root,
if (low_usage && siblings_low_usage)
elow = min(elow, parent_elow * low_usage /
siblings_low_usage);
+
+ parent_elow_level = READ_ONCE(parent->memory.elow_level);
+ elow_level = min(elow_level, parent_elow_level);
+ } else {
+ elow_level = 0;
}
+
exit:
memcg->memory.emin = emin;
memcg->memory.elow = elow;
+ memcg->memory.elow_level = elow_level;
if (usage <= emin)
return MEMCG_PROT_MIN;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index ee47bbc..82897fa 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -93,8 +93,9 @@ struct scan_control {
* unless we threaten to OOM. If any cgroups are skipped due to
* memory.low and nothing was reclaimed, go back for memory.low.
*/
- unsigned int memcg_low_reclaim:1;
unsigned int memcg_low_skipped:1;
+ unsigned int memcg_low_level:3;
+ unsigned int memcg_low_step:2;
unsigned int hibernation_mode:1;
@@ -2463,7 +2464,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
lruvec_size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx);
protection = mem_cgroup_protection(memcg,
- sc->memcg_low_reclaim);
+ sc->memcg_low_level > memcg->memory.elow_level);
if (protection) {
/*
@@ -2768,6 +2769,7 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
do {
unsigned long reclaimed;
unsigned long scanned;
+ unsigned long step;
switch (mem_cgroup_protected(root, memcg)) {
case MEMCG_PROT_MIN:
@@ -2783,10 +2785,16 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
* there is an unprotected supply
* of reclaimable memory from other cgroups.
*/
- if (!sc->memcg_low_reclaim) {
+ if (sc->memcg_low_level <= memcg->memory.elow_level) {
+ // should record the min step
+ // TODO: init the step to max.
+ step = memcg->memory.elow_level - sc->memcg_low_level;
+ if (step < sc->memcg_low_step)
+ sc->memcg_low_step = step;
sc->memcg_low_skipped = 1;
continue;
}
+
memcg_memory_event(memcg, MEMCG_LOW);
break;
case MEMCG_PROT_NONE:
@@ -3066,6 +3074,8 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
pg_data_t *last_pgdat;
struct zoneref *z;
struct zone *zone;
+
+ sc->memcg_low_step = MEMCG_LOW_LEVEL_MAX - 1,
retry:
delayacct_freepages_start();
@@ -3112,9 +3122,16 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
return 1;
/* Untapped cgroup reserves? Don't OOM, retry. */
- if (sc->memcg_low_skipped) {
+ if (sc->memcg_low_skipped &&
+ sc->memcg_low_level < MEMCG_LOW_LEVEL_MAX) {
+ // TODO: if still set the prio to initial_priority,
+ // it may take long time to finish.
+ // needs to improve,
+ // sc->priority = (initial_priority >> memcg_low_level); ?
sc->priority = initial_priority;
- sc->memcg_low_reclaim = 1;
+
+ sc->memcg_low_level += sc->memcg_low_step + 1;
+ // considering memcg_low_step
sc->memcg_low_skipped = 0;
goto retry;
}
--
1.8.3.1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment