aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup/cpuset.c4
-rw-r--r--kernel/events/uprobes.c10
-rw-r--r--kernel/fork.c7
-rw-r--r--kernel/kexec_core.c5
-rw-r--r--kernel/memremap.c103
-rw-r--r--kernel/power/snapshot.c2
-rw-r--r--kernel/resource.c15
-rw-r--r--kernel/sysctl.c8
8 files changed, 105 insertions, 49 deletions
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 266f10cb7222..9510a5b32eaf 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -2666,9 +2666,9 @@ void cpuset_print_current_mems_allowed(void)
2666 rcu_read_lock(); 2666 rcu_read_lock();
2667 2667
2668 cgrp = task_cs(current)->css.cgroup; 2668 cgrp = task_cs(current)->css.cgroup;
2669 pr_info("%s cpuset=", current->comm); 2669 pr_cont(",cpuset=");
2670 pr_cont_cgroup_name(cgrp); 2670 pr_cont_cgroup_name(cgrp);
2671 pr_cont(" mems_allowed=%*pbl\n", 2671 pr_cont(",mems_allowed=%*pbl",
2672 nodemask_pr_args(&current->mems_allowed)); 2672 nodemask_pr_args(&current->mems_allowed));
2673 2673
2674 rcu_read_unlock(); 2674 rcu_read_unlock();
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index abbd8da9ac21..8aef47ee7bfa 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -171,11 +171,11 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
171 .address = addr, 171 .address = addr,
172 }; 172 };
173 int err; 173 int err;
174 /* For mmu_notifiers */ 174 struct mmu_notifier_range range;
175 const unsigned long mmun_start = addr;
176 const unsigned long mmun_end = addr + PAGE_SIZE;
177 struct mem_cgroup *memcg; 175 struct mem_cgroup *memcg;
178 176
177 mmu_notifier_range_init(&range, mm, addr, addr + PAGE_SIZE);
178
179 VM_BUG_ON_PAGE(PageTransHuge(old_page), old_page); 179 VM_BUG_ON_PAGE(PageTransHuge(old_page), old_page);
180 180
181 err = mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL, &memcg, 181 err = mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL, &memcg,
@@ -186,7 +186,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
186 /* For try_to_free_swap() and munlock_vma_page() below */ 186 /* For try_to_free_swap() and munlock_vma_page() below */
187 lock_page(old_page); 187 lock_page(old_page);
188 188
189 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); 189 mmu_notifier_invalidate_range_start(&range);
190 err = -EAGAIN; 190 err = -EAGAIN;
191 if (!page_vma_mapped_walk(&pvmw)) { 191 if (!page_vma_mapped_walk(&pvmw)) {
192 mem_cgroup_cancel_charge(new_page, memcg, false); 192 mem_cgroup_cancel_charge(new_page, memcg, false);
@@ -220,7 +220,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
220 220
221 err = 0; 221 err = 0;
222 unlock: 222 unlock:
223 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); 223 mmu_notifier_invalidate_range_end(&range);
224 unlock_page(old_page); 224 unlock_page(old_page);
225 return err; 225 return err;
226} 226}
diff --git a/kernel/fork.c b/kernel/fork.c
index e2a5156bc9c3..d439c48ecf18 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -744,15 +744,16 @@ void __init __weak arch_task_cache_init(void) { }
744static void set_max_threads(unsigned int max_threads_suggested) 744static void set_max_threads(unsigned int max_threads_suggested)
745{ 745{
746 u64 threads; 746 u64 threads;
747 unsigned long nr_pages = totalram_pages();
747 748
748 /* 749 /*
749 * The number of threads shall be limited such that the thread 750 * The number of threads shall be limited such that the thread
750 * structures may only consume a small part of the available memory. 751 * structures may only consume a small part of the available memory.
751 */ 752 */
752 if (fls64(totalram_pages) + fls64(PAGE_SIZE) > 64) 753 if (fls64(nr_pages) + fls64(PAGE_SIZE) > 64)
753 threads = MAX_THREADS; 754 threads = MAX_THREADS;
754 else 755 else
755 threads = div64_u64((u64) totalram_pages * (u64) PAGE_SIZE, 756 threads = div64_u64((u64) nr_pages * (u64) PAGE_SIZE,
756 (u64) THREAD_SIZE * 8UL); 757 (u64) THREAD_SIZE * 8UL);
757 758
758 if (threads > max_threads_suggested) 759 if (threads > max_threads_suggested)
@@ -840,7 +841,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
840{ 841{
841 struct task_struct *tsk; 842 struct task_struct *tsk;
842 unsigned long *stack; 843 unsigned long *stack;
843 struct vm_struct *stack_vm_area; 844 struct vm_struct *stack_vm_area __maybe_unused;
844 int err; 845 int err;
845 846
846 if (node == NUMA_NO_NODE) 847 if (node == NUMA_NO_NODE)
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 86ef06d3dbe3..d7140447be75 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -152,6 +152,7 @@ int sanity_check_segment_list(struct kimage *image)
152 int i; 152 int i;
153 unsigned long nr_segments = image->nr_segments; 153 unsigned long nr_segments = image->nr_segments;
154 unsigned long total_pages = 0; 154 unsigned long total_pages = 0;
155 unsigned long nr_pages = totalram_pages();
155 156
156 /* 157 /*
157 * Verify we have good destination addresses. The caller is 158 * Verify we have good destination addresses. The caller is
@@ -217,13 +218,13 @@ int sanity_check_segment_list(struct kimage *image)
217 * wasted allocating pages, which can cause a soft lockup. 218 * wasted allocating pages, which can cause a soft lockup.
218 */ 219 */
219 for (i = 0; i < nr_segments; i++) { 220 for (i = 0; i < nr_segments; i++) {
220 if (PAGE_COUNT(image->segment[i].memsz) > totalram_pages / 2) 221 if (PAGE_COUNT(image->segment[i].memsz) > nr_pages / 2)
221 return -EINVAL; 222 return -EINVAL;
222 223
223 total_pages += PAGE_COUNT(image->segment[i].memsz); 224 total_pages += PAGE_COUNT(image->segment[i].memsz);
224 } 225 }
225 226
226 if (total_pages > totalram_pages / 2) 227 if (total_pages > nr_pages / 2)
227 return -EINVAL; 228 return -EINVAL;
228 229
229 /* 230 /*
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 9eced2cc9f94..a856cb5ff192 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -11,6 +11,7 @@
11#include <linux/types.h> 11#include <linux/types.h>
12#include <linux/wait_bit.h> 12#include <linux/wait_bit.h>
13#include <linux/xarray.h> 13#include <linux/xarray.h>
14#include <linux/hmm.h>
14 15
15static DEFINE_XARRAY(pgmap_array); 16static DEFINE_XARRAY(pgmap_array);
16#define SECTION_MASK ~((1UL << PA_SECTION_SHIFT) - 1) 17#define SECTION_MASK ~((1UL << PA_SECTION_SHIFT) - 1)
@@ -24,6 +25,9 @@ vm_fault_t device_private_entry_fault(struct vm_area_struct *vma,
24 pmd_t *pmdp) 25 pmd_t *pmdp)
25{ 26{
26 struct page *page = device_private_entry_to_page(entry); 27 struct page *page = device_private_entry_to_page(entry);
28 struct hmm_devmem *devmem;
29
30 devmem = container_of(page->pgmap, typeof(*devmem), pagemap);
27 31
28 /* 32 /*
29 * The page_fault() callback must migrate page back to system memory 33 * The page_fault() callback must migrate page back to system memory
@@ -39,7 +43,7 @@ vm_fault_t device_private_entry_fault(struct vm_area_struct *vma,
39 * There is a more in-depth description of what that callback can and 43 * There is a more in-depth description of what that callback can and
40 * cannot do, in include/linux/memremap.h 44 * cannot do, in include/linux/memremap.h
41 */ 45 */
42 return page->pgmap->page_fault(vma, addr, page, flags, pmdp); 46 return devmem->page_fault(vma, addr, page, flags, pmdp);
43} 47}
44EXPORT_SYMBOL(device_private_entry_fault); 48EXPORT_SYMBOL(device_private_entry_fault);
45#endif /* CONFIG_DEVICE_PRIVATE */ 49#endif /* CONFIG_DEVICE_PRIVATE */
@@ -87,24 +91,29 @@ static void devm_memremap_pages_release(void *data)
87 struct resource *res = &pgmap->res; 91 struct resource *res = &pgmap->res;
88 resource_size_t align_start, align_size; 92 resource_size_t align_start, align_size;
89 unsigned long pfn; 93 unsigned long pfn;
94 int nid;
90 95
96 pgmap->kill(pgmap->ref);
91 for_each_device_pfn(pfn, pgmap) 97 for_each_device_pfn(pfn, pgmap)
92 put_page(pfn_to_page(pfn)); 98 put_page(pfn_to_page(pfn));
93 99
94 if (percpu_ref_tryget_live(pgmap->ref)) {
95 dev_WARN(dev, "%s: page mapping is still live!\n", __func__);
96 percpu_ref_put(pgmap->ref);
97 }
98
99 /* pages are dead and unused, undo the arch mapping */ 100 /* pages are dead and unused, undo the arch mapping */
100 align_start = res->start & ~(SECTION_SIZE - 1); 101 align_start = res->start & ~(SECTION_SIZE - 1);
101 align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE) 102 align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE)
102 - align_start; 103 - align_start;
103 104
105 nid = page_to_nid(pfn_to_page(align_start >> PAGE_SHIFT));
106
104 mem_hotplug_begin(); 107 mem_hotplug_begin();
105 arch_remove_memory(align_start, align_size, pgmap->altmap_valid ? 108 if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
106 &pgmap->altmap : NULL); 109 pfn = align_start >> PAGE_SHIFT;
107 kasan_remove_zero_shadow(__va(align_start), align_size); 110 __remove_pages(page_zone(pfn_to_page(pfn)), pfn,
111 align_size >> PAGE_SHIFT, NULL);
112 } else {
113 arch_remove_memory(nid, align_start, align_size,
114 pgmap->altmap_valid ? &pgmap->altmap : NULL);
115 kasan_remove_zero_shadow(__va(align_start), align_size);
116 }
108 mem_hotplug_done(); 117 mem_hotplug_done();
109 118
110 untrack_pfn(NULL, PHYS_PFN(align_start), align_size); 119 untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
@@ -116,7 +125,7 @@ static void devm_memremap_pages_release(void *data)
116/** 125/**
117 * devm_memremap_pages - remap and provide memmap backing for the given resource 126 * devm_memremap_pages - remap and provide memmap backing for the given resource
118 * @dev: hosting device for @res 127 * @dev: hosting device for @res
119 * @pgmap: pointer to a struct dev_pgmap 128 * @pgmap: pointer to a struct dev_pagemap
120 * 129 *
121 * Notes: 130 * Notes:
122 * 1/ At a minimum the res, ref and type members of @pgmap must be initialized 131 * 1/ At a minimum the res, ref and type members of @pgmap must be initialized
@@ -125,11 +134,8 @@ static void devm_memremap_pages_release(void *data)
125 * 2/ The altmap field may optionally be initialized, in which case altmap_valid 134 * 2/ The altmap field may optionally be initialized, in which case altmap_valid
126 * must be set to true 135 * must be set to true
127 * 136 *
128 * 3/ pgmap.ref must be 'live' on entry and 'dead' before devm_memunmap_pages() 137 * 3/ pgmap->ref must be 'live' on entry and will be killed at
129 * time (or devm release event). The expected order of events is that ref has 138 * devm_memremap_pages_release() time, or if this routine fails.
130 * been through percpu_ref_kill() before devm_memremap_pages_release(). The
131 * wait for the completion of all references being dropped and
132 * percpu_ref_exit() must occur after devm_memremap_pages_release().
133 * 139 *
134 * 4/ res is expected to be a host memory range that could feasibly be 140 * 4/ res is expected to be a host memory range that could feasibly be
135 * treated as a "System RAM" range, i.e. not a device mmio range, but 141 * treated as a "System RAM" range, i.e. not a device mmio range, but
@@ -145,6 +151,9 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
145 pgprot_t pgprot = PAGE_KERNEL; 151 pgprot_t pgprot = PAGE_KERNEL;
146 int error, nid, is_ram; 152 int error, nid, is_ram;
147 153
154 if (!pgmap->ref || !pgmap->kill)
155 return ERR_PTR(-EINVAL);
156
148 align_start = res->start & ~(SECTION_SIZE - 1); 157 align_start = res->start & ~(SECTION_SIZE - 1);
149 align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE) 158 align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE)
150 - align_start; 159 - align_start;
@@ -167,18 +176,13 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
167 is_ram = region_intersects(align_start, align_size, 176 is_ram = region_intersects(align_start, align_size,
168 IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE); 177 IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE);
169 178
170 if (is_ram == REGION_MIXED) { 179 if (is_ram != REGION_DISJOINT) {
171 WARN_ONCE(1, "%s attempted on mixed region %pr\n", 180 WARN_ONCE(1, "%s attempted on %s region %pr\n", __func__,
172 __func__, res); 181 is_ram == REGION_MIXED ? "mixed" : "ram", res);
173 return ERR_PTR(-ENXIO); 182 error = -ENXIO;
183 goto err_array;
174 } 184 }
175 185
176 if (is_ram == REGION_INTERSECTS)
177 return __va(res->start);
178
179 if (!pgmap->ref)
180 return ERR_PTR(-EINVAL);
181
182 pgmap->dev = dev; 186 pgmap->dev = dev;
183 187
184 error = xa_err(xa_store_range(&pgmap_array, PHYS_PFN(res->start), 188 error = xa_err(xa_store_range(&pgmap_array, PHYS_PFN(res->start),
@@ -196,17 +200,40 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
196 goto err_pfn_remap; 200 goto err_pfn_remap;
197 201
198 mem_hotplug_begin(); 202 mem_hotplug_begin();
199 error = kasan_add_zero_shadow(__va(align_start), align_size); 203
200 if (error) { 204 /*
201 mem_hotplug_done(); 205 * For device private memory we call add_pages() as we only need to
202 goto err_kasan; 206 * allocate and initialize struct page for the device memory. More-
207 * over the device memory is un-accessible thus we do not want to
208 * create a linear mapping for the memory like arch_add_memory()
209 * would do.
210 *
211 * For all other device memory types, which are accessible by
212 * the CPU, we do want the linear mapping and thus use
213 * arch_add_memory().
214 */
215 if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
216 error = add_pages(nid, align_start >> PAGE_SHIFT,
217 align_size >> PAGE_SHIFT, NULL, false);
218 } else {
219 error = kasan_add_zero_shadow(__va(align_start), align_size);
220 if (error) {
221 mem_hotplug_done();
222 goto err_kasan;
223 }
224
225 error = arch_add_memory(nid, align_start, align_size, altmap,
226 false);
227 }
228
229 if (!error) {
230 struct zone *zone;
231
232 zone = &NODE_DATA(nid)->node_zones[ZONE_DEVICE];
233 move_pfn_range_to_zone(zone, align_start >> PAGE_SHIFT,
234 align_size >> PAGE_SHIFT, altmap);
203 } 235 }
204 236
205 error = arch_add_memory(nid, align_start, align_size, altmap, false);
206 if (!error)
207 move_pfn_range_to_zone(&NODE_DATA(nid)->node_zones[ZONE_DEVICE],
208 align_start >> PAGE_SHIFT,
209 align_size >> PAGE_SHIFT, altmap);
210 mem_hotplug_done(); 237 mem_hotplug_done();
211 if (error) 238 if (error)
212 goto err_add_memory; 239 goto err_add_memory;
@@ -220,7 +247,10 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
220 align_size >> PAGE_SHIFT, pgmap); 247 align_size >> PAGE_SHIFT, pgmap);
221 percpu_ref_get_many(pgmap->ref, pfn_end(pgmap) - pfn_first(pgmap)); 248 percpu_ref_get_many(pgmap->ref, pfn_end(pgmap) - pfn_first(pgmap));
222 249
223 devm_add_action(dev, devm_memremap_pages_release, pgmap); 250 error = devm_add_action_or_reset(dev, devm_memremap_pages_release,
251 pgmap);
252 if (error)
253 return ERR_PTR(error);
224 254
225 return __va(res->start); 255 return __va(res->start);
226 256
@@ -231,9 +261,10 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
231 err_pfn_remap: 261 err_pfn_remap:
232 pgmap_array_delete(res); 262 pgmap_array_delete(res);
233 err_array: 263 err_array:
264 pgmap->kill(pgmap->ref);
234 return ERR_PTR(error); 265 return ERR_PTR(error);
235} 266}
236EXPORT_SYMBOL(devm_memremap_pages); 267EXPORT_SYMBOL_GPL(devm_memremap_pages);
237 268
238unsigned long vmem_altmap_offset(struct vmem_altmap *altmap) 269unsigned long vmem_altmap_offset(struct vmem_altmap *altmap)
239{ 270{
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index b0308a2c6000..640b2034edd6 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -105,7 +105,7 @@ unsigned long image_size;
105 105
106void __init hibernate_image_size_init(void) 106void __init hibernate_image_size_init(void)
107{ 107{
108 image_size = ((totalram_pages * 2) / 5) * PAGE_SIZE; 108 image_size = ((totalram_pages() * 2) / 5) * PAGE_SIZE;
109} 109}
110 110
111/* 111/*
diff --git a/kernel/resource.c b/kernel/resource.c
index b0fbf685c77a..915c02e8e5dd 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -1256,6 +1256,21 @@ int release_mem_region_adjustable(struct resource *parent,
1256 continue; 1256 continue;
1257 } 1257 }
1258 1258
1259 /*
1260 * All memory regions added from memory-hotplug path have the
1261 * flag IORESOURCE_SYSTEM_RAM. If the resource does not have
1262 * this flag, we know that we are dealing with a resource coming
1263 * from HMM/devm. HMM/devm use another mechanism to add/release
1264 * a resource. This goes via devm_request_mem_region and
1265 * devm_release_mem_region.
1266 * HMM/devm take care to release their resources when they want,
1267 * so if we are dealing with them, let us just back off here.
1268 */
1269 if (!(res->flags & IORESOURCE_SYSRAM)) {
1270 ret = 0;
1271 break;
1272 }
1273
1259 if (!(res->flags & IORESOURCE_MEM)) 1274 if (!(res->flags & IORESOURCE_MEM))
1260 break; 1275 break;
1261 1276
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 5fc724e4e454..1825f712e73b 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1463,6 +1463,14 @@ static struct ctl_table vm_table[] = {
1463 .extra1 = &zero, 1463 .extra1 = &zero,
1464 }, 1464 },
1465 { 1465 {
1466 .procname = "watermark_boost_factor",
1467 .data = &watermark_boost_factor,
1468 .maxlen = sizeof(watermark_boost_factor),
1469 .mode = 0644,
1470 .proc_handler = watermark_boost_factor_sysctl_handler,
1471 .extra1 = &zero,
1472 },
1473 {
1466 .procname = "watermark_scale_factor", 1474 .procname = "watermark_scale_factor",
1467 .data = &watermark_scale_factor, 1475 .data = &watermark_scale_factor,
1468 .maxlen = sizeof(watermark_scale_factor), 1476 .maxlen = sizeof(watermark_scale_factor),