diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-03-19 17:48:35 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-03-19 17:48:35 -0400 |
commit | 0d707a2f24c4a962313cffc980e2d74df460e45a (patch) | |
tree | 3db3487e147c1c6482edfe3ae22f0c8ec0857978 | |
parent | efac2483e8f289cd879e750075e63a9d16897e65 (diff) | |
parent | b3a5d111994450909158929560906f2c1c6c1d85 (diff) |
Merge branch 'for-4.16-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu
Pull percpu fixes from Tejun Heo:
"Late percpu pull request for v4.16-rc6.
- percpu allocator pool replenishing no longer triggers OOM or
warning messages.
Also, the alloc interface now understands __GFP_NORETRY and
__GFP_NOWARN. This is to allow avoiding OOMs from userland
triggered actions like bpf map creation.
Also added cond_resched() in alloc loop.
- perpcu allocation now can be interrupted by kill sigs to avoid
deadlocking OOM killer.
- Added Dennis Zhou as a co-maintainer.
He has rewritten the area map allocator, understands most of the
code base and has been responsive for all bug reports"
* 'for-4.16-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu:
percpu_ref: Update doc to dissuade users from depending on internal RCU grace periods
mm: Allow to kill tasks doing pcpu_alloc() and waiting for pcpu_balance_workfn()
percpu: include linux/sched.h for cond_resched()
percpu: add a schedule point in pcpu_balance_workfn()
percpu: allow select gfp to be passed to underlying allocators
percpu: add __GFP_NORETRY semantics to the percpu balancing path
percpu: match chunk allocator declarations with definitions
percpu: add Dennis Zhou as a percpu co-maintainer
-rw-r--r-- | MAINTAINERS | 1 | ||||
-rw-r--r-- | include/linux/percpu-refcount.h | 18 | ||||
-rw-r--r-- | lib/percpu-refcount.c | 2 | ||||
-rw-r--r-- | mm/percpu-km.c | 8 | ||||
-rw-r--r-- | mm/percpu-vm.c | 18 | ||||
-rw-r--r-- | mm/percpu.c | 67 |
6 files changed, 74 insertions, 40 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 205c8fc12a9c..4e62756936fa 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -10844,6 +10844,7 @@ F: drivers/platform/x86/peaq-wmi.c | |||
10844 | PER-CPU MEMORY ALLOCATOR | 10844 | PER-CPU MEMORY ALLOCATOR |
10845 | M: Tejun Heo <tj@kernel.org> | 10845 | M: Tejun Heo <tj@kernel.org> |
10846 | M: Christoph Lameter <cl@linux.com> | 10846 | M: Christoph Lameter <cl@linux.com> |
10847 | M: Dennis Zhou <dennisszhou@gmail.com> | ||
10847 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu.git | 10848 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu.git |
10848 | S: Maintained | 10849 | S: Maintained |
10849 | F: include/linux/percpu*.h | 10850 | F: include/linux/percpu*.h |
diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index 864d167a1073..009cdf3d65b6 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h | |||
@@ -30,10 +30,14 @@ | |||
30 | * calls io_destroy() or the process exits. | 30 | * calls io_destroy() or the process exits. |
31 | * | 31 | * |
32 | * In the aio code, kill_ioctx() is called when we wish to destroy a kioctx; it | 32 | * In the aio code, kill_ioctx() is called when we wish to destroy a kioctx; it |
33 | * calls percpu_ref_kill(), then hlist_del_rcu() and synchronize_rcu() to remove | 33 | * removes the kioctx from the proccess's table of kioctxs and kills percpu_ref. |
34 | * the kioctx from the proccess's list of kioctxs - after that, there can't be | 34 | * After that, there can't be any new users of the kioctx (from lookup_ioctx()) |
35 | * any new users of the kioctx (from lookup_ioctx()) and it's then safe to drop | 35 | * and it's then safe to drop the initial ref with percpu_ref_put(). |
36 | * the initial ref with percpu_ref_put(). | 36 | * |
37 | * Note that the free path, free_ioctx(), needs to go through explicit call_rcu() | ||
38 | * to synchronize with RCU protected lookup_ioctx(). percpu_ref operations don't | ||
39 | * imply RCU grace periods of any kind and if a user wants to combine percpu_ref | ||
40 | * with RCU protection, it must be done explicitly. | ||
37 | * | 41 | * |
38 | * Code that does a two stage shutdown like this often needs some kind of | 42 | * Code that does a two stage shutdown like this often needs some kind of |
39 | * explicit synchronization to ensure the initial refcount can only be dropped | 43 | * explicit synchronization to ensure the initial refcount can only be dropped |
@@ -113,8 +117,10 @@ void percpu_ref_reinit(struct percpu_ref *ref); | |||
113 | * Must be used to drop the initial ref on a percpu refcount; must be called | 117 | * Must be used to drop the initial ref on a percpu refcount; must be called |
114 | * precisely once before shutdown. | 118 | * precisely once before shutdown. |
115 | * | 119 | * |
116 | * Puts @ref in non percpu mode, then does a call_rcu() before gathering up the | 120 | * Switches @ref into atomic mode before gathering up the percpu counters |
117 | * percpu counters and dropping the initial ref. | 121 | * and dropping the initial ref. |
122 | * | ||
123 | * There are no implied RCU grace periods between kill and release. | ||
118 | */ | 124 | */ |
119 | static inline void percpu_ref_kill(struct percpu_ref *ref) | 125 | static inline void percpu_ref_kill(struct percpu_ref *ref) |
120 | { | 126 | { |
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c index 30e7dd88148b..9f96fa7bc000 100644 --- a/lib/percpu-refcount.c +++ b/lib/percpu-refcount.c | |||
@@ -322,6 +322,8 @@ EXPORT_SYMBOL_GPL(percpu_ref_switch_to_percpu); | |||
322 | * This function normally doesn't block and can be called from any context | 322 | * This function normally doesn't block and can be called from any context |
323 | * but it may block if @confirm_kill is specified and @ref is in the | 323 | * but it may block if @confirm_kill is specified and @ref is in the |
324 | * process of switching to atomic mode by percpu_ref_switch_to_atomic(). | 324 | * process of switching to atomic mode by percpu_ref_switch_to_atomic(). |
325 | * | ||
326 | * There are no implied RCU grace periods between kill and release. | ||
325 | */ | 327 | */ |
326 | void percpu_ref_kill_and_confirm(struct percpu_ref *ref, | 328 | void percpu_ref_kill_and_confirm(struct percpu_ref *ref, |
327 | percpu_ref_func_t *confirm_kill) | 329 | percpu_ref_func_t *confirm_kill) |
diff --git a/mm/percpu-km.c b/mm/percpu-km.c index d2a76642c4ae..38de70ab1a0d 100644 --- a/mm/percpu-km.c +++ b/mm/percpu-km.c | |||
@@ -34,7 +34,7 @@ | |||
34 | #include <linux/log2.h> | 34 | #include <linux/log2.h> |
35 | 35 | ||
36 | static int pcpu_populate_chunk(struct pcpu_chunk *chunk, | 36 | static int pcpu_populate_chunk(struct pcpu_chunk *chunk, |
37 | int page_start, int page_end) | 37 | int page_start, int page_end, gfp_t gfp) |
38 | { | 38 | { |
39 | return 0; | 39 | return 0; |
40 | } | 40 | } |
@@ -45,18 +45,18 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, | |||
45 | /* nada */ | 45 | /* nada */ |
46 | } | 46 | } |
47 | 47 | ||
48 | static struct pcpu_chunk *pcpu_create_chunk(void) | 48 | static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp) |
49 | { | 49 | { |
50 | const int nr_pages = pcpu_group_sizes[0] >> PAGE_SHIFT; | 50 | const int nr_pages = pcpu_group_sizes[0] >> PAGE_SHIFT; |
51 | struct pcpu_chunk *chunk; | 51 | struct pcpu_chunk *chunk; |
52 | struct page *pages; | 52 | struct page *pages; |
53 | int i; | 53 | int i; |
54 | 54 | ||
55 | chunk = pcpu_alloc_chunk(); | 55 | chunk = pcpu_alloc_chunk(gfp); |
56 | if (!chunk) | 56 | if (!chunk) |
57 | return NULL; | 57 | return NULL; |
58 | 58 | ||
59 | pages = alloc_pages(GFP_KERNEL, order_base_2(nr_pages)); | 59 | pages = alloc_pages(gfp, order_base_2(nr_pages)); |
60 | if (!pages) { | 60 | if (!pages) { |
61 | pcpu_free_chunk(chunk); | 61 | pcpu_free_chunk(chunk); |
62 | return NULL; | 62 | return NULL; |
diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c index 9158e5a81391..d8078de912de 100644 --- a/mm/percpu-vm.c +++ b/mm/percpu-vm.c | |||
@@ -37,7 +37,7 @@ static struct page **pcpu_get_pages(void) | |||
37 | lockdep_assert_held(&pcpu_alloc_mutex); | 37 | lockdep_assert_held(&pcpu_alloc_mutex); |
38 | 38 | ||
39 | if (!pages) | 39 | if (!pages) |
40 | pages = pcpu_mem_zalloc(pages_size); | 40 | pages = pcpu_mem_zalloc(pages_size, GFP_KERNEL); |
41 | return pages; | 41 | return pages; |
42 | } | 42 | } |
43 | 43 | ||
@@ -73,18 +73,21 @@ static void pcpu_free_pages(struct pcpu_chunk *chunk, | |||
73 | * @pages: array to put the allocated pages into, indexed by pcpu_page_idx() | 73 | * @pages: array to put the allocated pages into, indexed by pcpu_page_idx() |
74 | * @page_start: page index of the first page to be allocated | 74 | * @page_start: page index of the first page to be allocated |
75 | * @page_end: page index of the last page to be allocated + 1 | 75 | * @page_end: page index of the last page to be allocated + 1 |
76 | * @gfp: allocation flags passed to the underlying allocator | ||
76 | * | 77 | * |
77 | * Allocate pages [@page_start,@page_end) into @pages for all units. | 78 | * Allocate pages [@page_start,@page_end) into @pages for all units. |
78 | * The allocation is for @chunk. Percpu core doesn't care about the | 79 | * The allocation is for @chunk. Percpu core doesn't care about the |
79 | * content of @pages and will pass it verbatim to pcpu_map_pages(). | 80 | * content of @pages and will pass it verbatim to pcpu_map_pages(). |
80 | */ | 81 | */ |
81 | static int pcpu_alloc_pages(struct pcpu_chunk *chunk, | 82 | static int pcpu_alloc_pages(struct pcpu_chunk *chunk, |
82 | struct page **pages, int page_start, int page_end) | 83 | struct page **pages, int page_start, int page_end, |
84 | gfp_t gfp) | ||
83 | { | 85 | { |
84 | const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM; | ||
85 | unsigned int cpu, tcpu; | 86 | unsigned int cpu, tcpu; |
86 | int i; | 87 | int i; |
87 | 88 | ||
89 | gfp |= __GFP_HIGHMEM; | ||
90 | |||
88 | for_each_possible_cpu(cpu) { | 91 | for_each_possible_cpu(cpu) { |
89 | for (i = page_start; i < page_end; i++) { | 92 | for (i = page_start; i < page_end; i++) { |
90 | struct page **pagep = &pages[pcpu_page_idx(cpu, i)]; | 93 | struct page **pagep = &pages[pcpu_page_idx(cpu, i)]; |
@@ -262,6 +265,7 @@ static void pcpu_post_map_flush(struct pcpu_chunk *chunk, | |||
262 | * @chunk: chunk of interest | 265 | * @chunk: chunk of interest |
263 | * @page_start: the start page | 266 | * @page_start: the start page |
264 | * @page_end: the end page | 267 | * @page_end: the end page |
268 | * @gfp: allocation flags passed to the underlying memory allocator | ||
265 | * | 269 | * |
266 | * For each cpu, populate and map pages [@page_start,@page_end) into | 270 | * For each cpu, populate and map pages [@page_start,@page_end) into |
267 | * @chunk. | 271 | * @chunk. |
@@ -270,7 +274,7 @@ static void pcpu_post_map_flush(struct pcpu_chunk *chunk, | |||
270 | * pcpu_alloc_mutex, does GFP_KERNEL allocation. | 274 | * pcpu_alloc_mutex, does GFP_KERNEL allocation. |
271 | */ | 275 | */ |
272 | static int pcpu_populate_chunk(struct pcpu_chunk *chunk, | 276 | static int pcpu_populate_chunk(struct pcpu_chunk *chunk, |
273 | int page_start, int page_end) | 277 | int page_start, int page_end, gfp_t gfp) |
274 | { | 278 | { |
275 | struct page **pages; | 279 | struct page **pages; |
276 | 280 | ||
@@ -278,7 +282,7 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk, | |||
278 | if (!pages) | 282 | if (!pages) |
279 | return -ENOMEM; | 283 | return -ENOMEM; |
280 | 284 | ||
281 | if (pcpu_alloc_pages(chunk, pages, page_start, page_end)) | 285 | if (pcpu_alloc_pages(chunk, pages, page_start, page_end, gfp)) |
282 | return -ENOMEM; | 286 | return -ENOMEM; |
283 | 287 | ||
284 | if (pcpu_map_pages(chunk, pages, page_start, page_end)) { | 288 | if (pcpu_map_pages(chunk, pages, page_start, page_end)) { |
@@ -325,12 +329,12 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, | |||
325 | pcpu_free_pages(chunk, pages, page_start, page_end); | 329 | pcpu_free_pages(chunk, pages, page_start, page_end); |
326 | } | 330 | } |
327 | 331 | ||
328 | static struct pcpu_chunk *pcpu_create_chunk(void) | 332 | static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp) |
329 | { | 333 | { |
330 | struct pcpu_chunk *chunk; | 334 | struct pcpu_chunk *chunk; |
331 | struct vm_struct **vms; | 335 | struct vm_struct **vms; |
332 | 336 | ||
333 | chunk = pcpu_alloc_chunk(); | 337 | chunk = pcpu_alloc_chunk(gfp); |
334 | if (!chunk) | 338 | if (!chunk) |
335 | return NULL; | 339 | return NULL; |
336 | 340 | ||
diff --git a/mm/percpu.c b/mm/percpu.c index 50e7fdf84055..9297098519a6 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
@@ -80,6 +80,7 @@ | |||
80 | #include <linux/vmalloc.h> | 80 | #include <linux/vmalloc.h> |
81 | #include <linux/workqueue.h> | 81 | #include <linux/workqueue.h> |
82 | #include <linux/kmemleak.h> | 82 | #include <linux/kmemleak.h> |
83 | #include <linux/sched.h> | ||
83 | 84 | ||
84 | #include <asm/cacheflush.h> | 85 | #include <asm/cacheflush.h> |
85 | #include <asm/sections.h> | 86 | #include <asm/sections.h> |
@@ -447,26 +448,25 @@ static void pcpu_next_fit_region(struct pcpu_chunk *chunk, int alloc_bits, | |||
447 | /** | 448 | /** |
448 | * pcpu_mem_zalloc - allocate memory | 449 | * pcpu_mem_zalloc - allocate memory |
449 | * @size: bytes to allocate | 450 | * @size: bytes to allocate |
451 | * @gfp: allocation flags | ||
450 | * | 452 | * |
451 | * Allocate @size bytes. If @size is smaller than PAGE_SIZE, | 453 | * Allocate @size bytes. If @size is smaller than PAGE_SIZE, |
452 | * kzalloc() is used; otherwise, vzalloc() is used. The returned | 454 | * kzalloc() is used; otherwise, the equivalent of vzalloc() is used. |
453 | * memory is always zeroed. | 455 | * This is to facilitate passing through whitelisted flags. The |
454 | * | 456 | * returned memory is always zeroed. |
455 | * CONTEXT: | ||
456 | * Does GFP_KERNEL allocation. | ||
457 | * | 457 | * |
458 | * RETURNS: | 458 | * RETURNS: |
459 | * Pointer to the allocated area on success, NULL on failure. | 459 | * Pointer to the allocated area on success, NULL on failure. |
460 | */ | 460 | */ |
461 | static void *pcpu_mem_zalloc(size_t size) | 461 | static void *pcpu_mem_zalloc(size_t size, gfp_t gfp) |
462 | { | 462 | { |
463 | if (WARN_ON_ONCE(!slab_is_available())) | 463 | if (WARN_ON_ONCE(!slab_is_available())) |
464 | return NULL; | 464 | return NULL; |
465 | 465 | ||
466 | if (size <= PAGE_SIZE) | 466 | if (size <= PAGE_SIZE) |
467 | return kzalloc(size, GFP_KERNEL); | 467 | return kzalloc(size, gfp); |
468 | else | 468 | else |
469 | return vzalloc(size); | 469 | return __vmalloc(size, gfp | __GFP_ZERO, PAGE_KERNEL); |
470 | } | 470 | } |
471 | 471 | ||
472 | /** | 472 | /** |
@@ -1154,12 +1154,12 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr, | |||
1154 | return chunk; | 1154 | return chunk; |
1155 | } | 1155 | } |
1156 | 1156 | ||
1157 | static struct pcpu_chunk *pcpu_alloc_chunk(void) | 1157 | static struct pcpu_chunk *pcpu_alloc_chunk(gfp_t gfp) |
1158 | { | 1158 | { |
1159 | struct pcpu_chunk *chunk; | 1159 | struct pcpu_chunk *chunk; |
1160 | int region_bits; | 1160 | int region_bits; |
1161 | 1161 | ||
1162 | chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size); | 1162 | chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size, gfp); |
1163 | if (!chunk) | 1163 | if (!chunk) |
1164 | return NULL; | 1164 | return NULL; |
1165 | 1165 | ||
@@ -1168,17 +1168,17 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void) | |||
1168 | region_bits = pcpu_chunk_map_bits(chunk); | 1168 | region_bits = pcpu_chunk_map_bits(chunk); |
1169 | 1169 | ||
1170 | chunk->alloc_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits) * | 1170 | chunk->alloc_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits) * |
1171 | sizeof(chunk->alloc_map[0])); | 1171 | sizeof(chunk->alloc_map[0]), gfp); |
1172 | if (!chunk->alloc_map) | 1172 | if (!chunk->alloc_map) |
1173 | goto alloc_map_fail; | 1173 | goto alloc_map_fail; |
1174 | 1174 | ||
1175 | chunk->bound_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits + 1) * | 1175 | chunk->bound_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits + 1) * |
1176 | sizeof(chunk->bound_map[0])); | 1176 | sizeof(chunk->bound_map[0]), gfp); |
1177 | if (!chunk->bound_map) | 1177 | if (!chunk->bound_map) |
1178 | goto bound_map_fail; | 1178 | goto bound_map_fail; |
1179 | 1179 | ||
1180 | chunk->md_blocks = pcpu_mem_zalloc(pcpu_chunk_nr_blocks(chunk) * | 1180 | chunk->md_blocks = pcpu_mem_zalloc(pcpu_chunk_nr_blocks(chunk) * |
1181 | sizeof(chunk->md_blocks[0])); | 1181 | sizeof(chunk->md_blocks[0]), gfp); |
1182 | if (!chunk->md_blocks) | 1182 | if (!chunk->md_blocks) |
1183 | goto md_blocks_fail; | 1183 | goto md_blocks_fail; |
1184 | 1184 | ||
@@ -1277,9 +1277,11 @@ static void pcpu_chunk_depopulated(struct pcpu_chunk *chunk, | |||
1277 | * pcpu_addr_to_page - translate address to physical address | 1277 | * pcpu_addr_to_page - translate address to physical address |
1278 | * pcpu_verify_alloc_info - check alloc_info is acceptable during init | 1278 | * pcpu_verify_alloc_info - check alloc_info is acceptable during init |
1279 | */ | 1279 | */ |
1280 | static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size); | 1280 | static int pcpu_populate_chunk(struct pcpu_chunk *chunk, |
1281 | static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size); | 1281 | int page_start, int page_end, gfp_t gfp); |
1282 | static struct pcpu_chunk *pcpu_create_chunk(void); | 1282 | static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, |
1283 | int page_start, int page_end); | ||
1284 | static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp); | ||
1283 | static void pcpu_destroy_chunk(struct pcpu_chunk *chunk); | 1285 | static void pcpu_destroy_chunk(struct pcpu_chunk *chunk); |
1284 | static struct page *pcpu_addr_to_page(void *addr); | 1286 | static struct page *pcpu_addr_to_page(void *addr); |
1285 | static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai); | 1287 | static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai); |
@@ -1339,6 +1341,8 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) | |||
1339 | static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, | 1341 | static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, |
1340 | gfp_t gfp) | 1342 | gfp_t gfp) |
1341 | { | 1343 | { |
1344 | /* whitelisted flags that can be passed to the backing allocators */ | ||
1345 | gfp_t pcpu_gfp = gfp & (GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); | ||
1342 | bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL; | 1346 | bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL; |
1343 | bool do_warn = !(gfp & __GFP_NOWARN); | 1347 | bool do_warn = !(gfp & __GFP_NOWARN); |
1344 | static int warn_limit = 10; | 1348 | static int warn_limit = 10; |
@@ -1369,8 +1373,17 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, | |||
1369 | return NULL; | 1373 | return NULL; |
1370 | } | 1374 | } |
1371 | 1375 | ||
1372 | if (!is_atomic) | 1376 | if (!is_atomic) { |
1373 | mutex_lock(&pcpu_alloc_mutex); | 1377 | /* |
1378 | * pcpu_balance_workfn() allocates memory under this mutex, | ||
1379 | * and it may wait for memory reclaim. Allow current task | ||
1380 | * to become OOM victim, in case of memory pressure. | ||
1381 | */ | ||
1382 | if (gfp & __GFP_NOFAIL) | ||
1383 | mutex_lock(&pcpu_alloc_mutex); | ||
1384 | else if (mutex_lock_killable(&pcpu_alloc_mutex)) | ||
1385 | return NULL; | ||
1386 | } | ||
1374 | 1387 | ||
1375 | spin_lock_irqsave(&pcpu_lock, flags); | 1388 | spin_lock_irqsave(&pcpu_lock, flags); |
1376 | 1389 | ||
@@ -1421,7 +1434,7 @@ restart: | |||
1421 | } | 1434 | } |
1422 | 1435 | ||
1423 | if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) { | 1436 | if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) { |
1424 | chunk = pcpu_create_chunk(); | 1437 | chunk = pcpu_create_chunk(pcpu_gfp); |
1425 | if (!chunk) { | 1438 | if (!chunk) { |
1426 | err = "failed to allocate new chunk"; | 1439 | err = "failed to allocate new chunk"; |
1427 | goto fail; | 1440 | goto fail; |
@@ -1450,7 +1463,7 @@ area_found: | |||
1450 | page_start, page_end) { | 1463 | page_start, page_end) { |
1451 | WARN_ON(chunk->immutable); | 1464 | WARN_ON(chunk->immutable); |
1452 | 1465 | ||
1453 | ret = pcpu_populate_chunk(chunk, rs, re); | 1466 | ret = pcpu_populate_chunk(chunk, rs, re, pcpu_gfp); |
1454 | 1467 | ||
1455 | spin_lock_irqsave(&pcpu_lock, flags); | 1468 | spin_lock_irqsave(&pcpu_lock, flags); |
1456 | if (ret) { | 1469 | if (ret) { |
@@ -1561,10 +1574,17 @@ void __percpu *__alloc_reserved_percpu(size_t size, size_t align) | |||
1561 | * pcpu_balance_workfn - manage the amount of free chunks and populated pages | 1574 | * pcpu_balance_workfn - manage the amount of free chunks and populated pages |
1562 | * @work: unused | 1575 | * @work: unused |
1563 | * | 1576 | * |
1564 | * Reclaim all fully free chunks except for the first one. | 1577 | * Reclaim all fully free chunks except for the first one. This is also |
1578 | * responsible for maintaining the pool of empty populated pages. However, | ||
1579 | * it is possible that this is called when physical memory is scarce causing | ||
1580 | * OOM killer to be triggered. We should avoid doing so until an actual | ||
1581 | * allocation causes the failure as it is possible that requests can be | ||
1582 | * serviced from already backed regions. | ||
1565 | */ | 1583 | */ |
1566 | static void pcpu_balance_workfn(struct work_struct *work) | 1584 | static void pcpu_balance_workfn(struct work_struct *work) |
1567 | { | 1585 | { |
1586 | /* gfp flags passed to underlying allocators */ | ||
1587 | const gfp_t gfp = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN; | ||
1568 | LIST_HEAD(to_free); | 1588 | LIST_HEAD(to_free); |
1569 | struct list_head *free_head = &pcpu_slot[pcpu_nr_slots - 1]; | 1589 | struct list_head *free_head = &pcpu_slot[pcpu_nr_slots - 1]; |
1570 | struct pcpu_chunk *chunk, *next; | 1590 | struct pcpu_chunk *chunk, *next; |
@@ -1600,6 +1620,7 @@ static void pcpu_balance_workfn(struct work_struct *work) | |||
1600 | spin_unlock_irq(&pcpu_lock); | 1620 | spin_unlock_irq(&pcpu_lock); |
1601 | } | 1621 | } |
1602 | pcpu_destroy_chunk(chunk); | 1622 | pcpu_destroy_chunk(chunk); |
1623 | cond_resched(); | ||
1603 | } | 1624 | } |
1604 | 1625 | ||
1605 | /* | 1626 | /* |
@@ -1645,7 +1666,7 @@ retry_pop: | |||
1645 | chunk->nr_pages) { | 1666 | chunk->nr_pages) { |
1646 | int nr = min(re - rs, nr_to_pop); | 1667 | int nr = min(re - rs, nr_to_pop); |
1647 | 1668 | ||
1648 | ret = pcpu_populate_chunk(chunk, rs, rs + nr); | 1669 | ret = pcpu_populate_chunk(chunk, rs, rs + nr, gfp); |
1649 | if (!ret) { | 1670 | if (!ret) { |
1650 | nr_to_pop -= nr; | 1671 | nr_to_pop -= nr; |
1651 | spin_lock_irq(&pcpu_lock); | 1672 | spin_lock_irq(&pcpu_lock); |
@@ -1662,7 +1683,7 @@ retry_pop: | |||
1662 | 1683 | ||
1663 | if (nr_to_pop) { | 1684 | if (nr_to_pop) { |
1664 | /* ran out of chunks to populate, create a new one and retry */ | 1685 | /* ran out of chunks to populate, create a new one and retry */ |
1665 | chunk = pcpu_create_chunk(); | 1686 | chunk = pcpu_create_chunk(gfp); |
1666 | if (chunk) { | 1687 | if (chunk) { |
1667 | spin_lock_irq(&pcpu_lock); | 1688 | spin_lock_irq(&pcpu_lock); |
1668 | pcpu_chunk_relocate(chunk, -1); | 1689 | pcpu_chunk_relocate(chunk, -1); |