Created
May 9, 2019 12:04
-
-
Save pfactum/b323f9b93eabd8a6c15dc7a31f2dacaf to your computer and use it in GitHub Desktop.
KSM "always" mode preparations
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt | |
index 2b8ee90bb644..510766a3fa05 100644 | |
--- a/Documentation/admin-guide/kernel-parameters.txt | |
+++ b/Documentation/admin-guide/kernel-parameters.txt | |
@@ -2008,6 +2008,13 @@ | |
0: force disabled | |
1: force enabled | |
+ ksm_mode= | |
+ [KNL] | |
+ Format: [madvise|always] | |
+ Default: madvise | |
+ Can be used to control the default behavior of the system | |
+ with respect to merging anonymous memory. | |
+ | |
kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs. | |
Default is 0 (don't ignore, but inject #GP) | |
diff --git a/Documentation/admin-guide/mm/ksm.rst b/Documentation/admin-guide/mm/ksm.rst | |
index 9303786632d1..9af730640da7 100644 | |
--- a/Documentation/admin-guide/mm/ksm.rst | |
+++ b/Documentation/admin-guide/mm/ksm.rst | |
@@ -78,6 +78,13 @@ KSM daemon sysfs interface | |
The KSM daemon is controlled by sysfs files in ``/sys/kernel/mm/ksm/``, | |
readable by all but writable only by root: | |
+mode | |
+ * set madvise to deduplicate only madvised memory | |
+ * set always to allow deduplicating all the anonymous memory | |
+ (applies to newly allocated memory only) | |
+ | |
+ Default: madvise (maintains old behaviour) | |
+ | |
pages_to_scan | |
how many pages to scan before ksmd goes to sleep | |
e.g. ``echo 100 > /sys/kernel/mm/ksm/pages_to_scan``. | |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c | |
index 95ca1fe7283c..19cc246000e8 100644 | |
--- a/fs/proc/task_mmu.c | |
+++ b/fs/proc/task_mmu.c | |
@@ -648,6 +648,9 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) | |
[ilog2(VM_MIXEDMAP)] = "mm", | |
[ilog2(VM_HUGEPAGE)] = "hg", | |
[ilog2(VM_NOHUGEPAGE)] = "nh", | |
+#ifdef VM_UNMERGEABLE | |
+ [ilog2(VM_UNMERGEABLE)] = "ug", | |
+#endif | |
[ilog2(VM_MERGEABLE)] = "mg", | |
[ilog2(VM_UFFD_MISSING)]= "um", | |
[ilog2(VM_UFFD_WP)] = "uw", | |
diff --git a/include/linux/ksm.h b/include/linux/ksm.h | |
index e48b1e453ff5..3c076b35259c 100644 | |
--- a/include/linux/ksm.h | |
+++ b/include/linux/ksm.h | |
@@ -21,6 +21,11 @@ struct mem_cgroup; | |
#ifdef CONFIG_KSM | |
int ksm_madvise(struct vm_area_struct *vma, unsigned long start, | |
unsigned long end, int advice, unsigned long *vm_flags); | |
+#ifdef VM_UNMERGEABLE | |
+bool ksm_mode_always(void); | |
+#endif | |
+int ksm_enter(struct mm_struct *mm, struct vm_area_struct *vma, | |
+ unsigned long *vm_flags); | |
int __ksm_enter(struct mm_struct *mm); | |
void __ksm_exit(struct mm_struct *mm); | |
diff --git a/include/linux/mm.h b/include/linux/mm.h | |
index 6b10c21630f5..114cdb882cdd 100644 | |
--- a/include/linux/mm.h | |
+++ b/include/linux/mm.h | |
@@ -252,11 +252,13 @@ extern unsigned int kobjsize(const void *objp); | |
#define VM_HIGH_ARCH_BIT_2 34 /* bit only usable on 64-bit architectures */ | |
#define VM_HIGH_ARCH_BIT_3 35 /* bit only usable on 64-bit architectures */ | |
#define VM_HIGH_ARCH_BIT_4 36 /* bit only usable on 64-bit architectures */ | |
+#define VM_HIGH_ARCH_BIT_5 37 /* bit only usable on 64-bit architectures */ | |
#define VM_HIGH_ARCH_0 BIT(VM_HIGH_ARCH_BIT_0) | |
#define VM_HIGH_ARCH_1 BIT(VM_HIGH_ARCH_BIT_1) | |
#define VM_HIGH_ARCH_2 BIT(VM_HIGH_ARCH_BIT_2) | |
#define VM_HIGH_ARCH_3 BIT(VM_HIGH_ARCH_BIT_3) | |
#define VM_HIGH_ARCH_4 BIT(VM_HIGH_ARCH_BIT_4) | |
+#define VM_HIGH_ARCH_5 BIT(VM_HIGH_ARCH_BIT_5) | |
#endif /* CONFIG_ARCH_USES_HIGH_VMA_FLAGS */ | |
#ifdef CONFIG_ARCH_HAS_PKEYS | |
@@ -272,6 +274,10 @@ extern unsigned int kobjsize(const void *objp); | |
#endif | |
#endif /* CONFIG_ARCH_HAS_PKEYS */ | |
+#ifdef VM_HIGH_ARCH_5 | |
+#define VM_UNMERGEABLE VM_HIGH_ARCH_5 /* Opt-out for KSM "always" mode */ | |
+#endif /* VM_HIGH_ARCH_5 */ | |
+ | |
#if defined(CONFIG_X86) | |
# define VM_PAT VM_ARCH_1 /* PAT reserves whole VMA at once (x86) */ | |
#elif defined(CONFIG_PPC) | |
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h | |
index a1675d43777e..717e0fd9d2ef 100644 | |
--- a/include/trace/events/mmflags.h | |
+++ b/include/trace/events/mmflags.h | |
@@ -130,6 +130,12 @@ IF_HAVE_PG_IDLE(PG_idle, "idle" ) | |
#define IF_HAVE_VM_SOFTDIRTY(flag,name) | |
#endif | |
+#ifdef VM_UNMERGEABLE | |
+#define IF_HAVE_VM_UNMERGEABLE(flag,name) {flag, name }, | |
+#else | |
+#define IF_HAVE_VM_UNMERGEABLE(flag,name) | |
+#endif | |
+ | |
#define __def_vmaflag_names \ | |
{VM_READ, "read" }, \ | |
{VM_WRITE, "write" }, \ | |
@@ -161,6 +167,7 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, "softdirty" ) \ | |
{VM_MIXEDMAP, "mixedmap" }, \ | |
{VM_HUGEPAGE, "hugepage" }, \ | |
{VM_NOHUGEPAGE, "nohugepage" }, \ | |
+IF_HAVE_VM_UNMERGEABLE(VM_UNMERGEABLE, "unmergeable" ) \ | |
{VM_MERGEABLE, "mergeable" } \ | |
#define show_vma_flags(flags) \ | |
diff --git a/mm/ksm.c b/mm/ksm.c | |
index fc64874dc6f4..6a2280b875cc 100644 | |
--- a/mm/ksm.c | |
+++ b/mm/ksm.c | |
@@ -295,6 +295,12 @@ static int ksm_nr_node_ids = 1; | |
static unsigned long ksm_run = KSM_RUN_STOP; | |
static void wait_while_offlining(void); | |
+#ifdef VM_UNMERGEABLE | |
+#define KSM_MODE_MADVISE 0 | |
+#define KSM_MODE_ALWAYS 1 | |
+static unsigned long ksm_mode = KSM_MODE_MADVISE; | |
+#endif | |
+ | |
static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait); | |
static DECLARE_WAIT_QUEUE_HEAD(ksm_iter_wait); | |
static DEFINE_MUTEX(ksm_thread_mutex); | |
@@ -2450,36 +2456,18 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start, | |
switch (advice) { | |
case MADV_MERGEABLE: | |
- /* | |
- * Be somewhat over-protective for now! | |
- */ | |
- if (*vm_flags & (VM_MERGEABLE | VM_SHARED | VM_MAYSHARE | | |
- VM_PFNMAP | VM_IO | VM_DONTEXPAND | | |
- VM_HUGETLB | VM_MIXEDMAP)) | |
- return 0; /* just ignore the advice */ | |
- | |
- if (vma_is_dax(vma)) | |
- return 0; | |
- | |
-#ifdef VM_SAO | |
- if (*vm_flags & VM_SAO) | |
- return 0; | |
-#endif | |
-#ifdef VM_SPARC_ADI | |
- if (*vm_flags & VM_SPARC_ADI) | |
- return 0; | |
+#ifdef VM_UNMERGEABLE | |
+ *vm_flags &= ~VM_UNMERGEABLE; | |
#endif | |
- | |
- if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) { | |
- err = __ksm_enter(mm); | |
- if (err) | |
- return err; | |
- } | |
- | |
- *vm_flags |= VM_MERGEABLE; | |
+ err = ksm_enter(mm, vma, vm_flags); | |
+ if (err) | |
+ return err; | |
break; | |
case MADV_UNMERGEABLE: | |
+#ifdef VM_UNMERGEABLE | |
+ *vm_flags |= VM_UNMERGEABLE; | |
+#endif | |
if (!(*vm_flags & VM_MERGEABLE)) | |
return 0; /* just ignore the advice */ | |
@@ -2496,6 +2484,76 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start, | |
return 0; | |
} | |
+#ifdef VM_UNMERGEABLE | |
+bool ksm_mode_always(void) | |
+{ | |
+ return ksm_mode == KSM_MODE_ALWAYS; | |
+} | |
+ | |
+static int __init setup_ksm_mode(char *str) | |
+{ | |
+ int ret = 0; | |
+ | |
+ if (!str) | |
+ goto out; | |
+ | |
+ if (!strcmp(str, "madvise")) { | |
+ ksm_mode = KSM_MODE_MADVISE; | |
+ ret = 1; | |
+ } else if (!strcmp(str, "always")) { | |
+ ksm_mode = KSM_MODE_ALWAYS; | |
+ ret = 1; | |
+ } | |
+ | |
+out: | |
+ if (!ret) | |
+ pr_warn("ksm_mode= cannot parse, ignored\n"); | |
+ | |
+ return ret; | |
+} | |
+__setup("ksm_mode=", setup_ksm_mode); | |
+#endif | |
+ | |
+int ksm_enter(struct mm_struct *mm, struct vm_area_struct *vma, | |
+ unsigned long *vm_flags) | |
+{ | |
+ int err; | |
+ | |
+ /* | |
+ * Be somewhat over-protective for now! | |
+ */ | |
+ if (*vm_flags & (VM_MERGEABLE | VM_SHARED | VM_MAYSHARE | | |
+ VM_PFNMAP | VM_IO | VM_DONTEXPAND | | |
+ VM_HUGETLB | VM_MIXEDMAP)) | |
+ return 0; /* just ignore the advice */ | |
+ | |
+ if (vma_is_dax(vma)) | |
+ return 0; | |
+ | |
+#ifdef VM_SAO | |
+ if (*vm_flags & VM_SAO) | |
+ return 0; | |
+#endif | |
+#ifdef VM_SPARC_ADI | |
+ if (*vm_flags & VM_SPARC_ADI) | |
+ return 0; | |
+#endif | |
+#ifdef VM_UNMERGEABLE | |
+ if (*vm_flags & VM_UNMERGEABLE) | |
+ return 0; | |
+#endif | |
+ | |
+ if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) { | |
+ err = __ksm_enter(mm); | |
+ if (err) | |
+ return err; | |
+ } | |
+ | |
+ *vm_flags |= VM_MERGEABLE; | |
+ | |
+ return 0; | |
+} | |
+ | |
int __ksm_enter(struct mm_struct *mm) | |
{ | |
struct mm_slot *mm_slot; | |
@@ -2859,6 +2917,35 @@ static void wait_while_offlining(void) | |
static struct kobj_attribute _name##_attr = \ | |
__ATTR(_name, 0644, _name##_show, _name##_store) | |
+#ifdef VM_UNMERGEABLE | |
+static ssize_t mode_show(struct kobject *kobj, struct kobj_attribute *attr, | |
+ char *buf) | |
+{ | |
+ switch (ksm_mode) { | |
+ case KSM_MODE_MADVISE: | |
+ return sprintf(buf, "always [madvise]\n"); | |
+ case KSM_MODE_ALWAYS: | |
+ return sprintf(buf, "[always] madvise\n"); | |
+ } | |
+ | |
+ return sprintf(buf, "always [madvise]\n"); | |
+} | |
+ | |
+static ssize_t mode_store(struct kobject *kobj, struct kobj_attribute *attr, | |
+ const char *buf, size_t count) | |
+{ | |
+ if (!memcmp("madvise", buf, min(sizeof("madvise")-1, count))) | |
+ ksm_mode = KSM_MODE_MADVISE; | |
+ else if (!memcmp("always", buf, min(sizeof("always")-1, count))) | |
+ ksm_mode = KSM_MODE_ALWAYS; | |
+ else | |
+ return -EINVAL; | |
+ | |
+ return count; | |
+} | |
+KSM_ATTR(mode); | |
+#endif | |
+ | |
static ssize_t sleep_millisecs_show(struct kobject *kobj, | |
struct kobj_attribute *attr, char *buf) | |
{ | |
@@ -3161,6 +3248,9 @@ static ssize_t full_scans_show(struct kobject *kobj, | |
KSM_ATTR_RO(full_scans); | |
static struct attribute *ksm_attrs[] = { | |
+#ifdef VM_UNMERGEABLE | |
+ &mode_attr.attr, | |
+#endif | |
&sleep_millisecs_attr.attr, | |
&pages_to_scan_attr.attr, | |
&run_attr.attr, | |
diff --git a/mm/memory.c b/mm/memory.c | |
index ab650c21bccd..08f3f92de310 100644 | |
--- a/mm/memory.c | |
+++ b/mm/memory.c | |
@@ -2994,6 +2994,12 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) | |
update_mmu_cache(vma, vmf->address, vmf->pte); | |
unlock: | |
pte_unmap_unlock(vmf->pte, vmf->ptl); | |
+ | |
+#if defined(CONFIG_KSM) && defined(VM_UNMERGEABLE) | |
+ if (ksm_mode_always()) | |
+ ksm_enter(vma->vm_mm, vma, &vma->vm_flags); | |
+#endif | |
+ | |
return ret; | |
release: | |
mem_cgroup_cancel_charge(page, memcg, false); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment