aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-03-19 17:48:35 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-03-19 17:48:35 -0400
commit0d707a2f24c4a962313cffc980e2d74df460e45a (patch)
tree3db3487e147c1c6482edfe3ae22f0c8ec0857978
parentefac2483e8f289cd879e750075e63a9d16897e65 (diff)
parentb3a5d111994450909158929560906f2c1c6c1d85 (diff)
Merge branch 'for-4.16-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu
Pull percpu fixes from Tejun Heo: "Late percpu pull request for v4.16-rc6. - percpu allocator pool replenishing no longer triggers OOM or warning messages. Also, the alloc interface now understands __GFP_NORETRY and __GFP_NOWARN. This is to allow avoiding OOMs from userland triggered actions like bpf map creation. Also added cond_resched() in alloc loop. - perpcu allocation now can be interrupted by kill sigs to avoid deadlocking OOM killer. - Added Dennis Zhou as a co-maintainer. He has rewritten the area map allocator, understands most of the code base and has been responsive for all bug reports" * 'for-4.16-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu: percpu_ref: Update doc to dissuade users from depending on internal RCU grace periods mm: Allow to kill tasks doing pcpu_alloc() and waiting for pcpu_balance_workfn() percpu: include linux/sched.h for cond_resched() percpu: add a schedule point in pcpu_balance_workfn() percpu: allow select gfp to be passed to underlying allocators percpu: add __GFP_NORETRY semantics to the percpu balancing path percpu: match chunk allocator declarations with definitions percpu: add Dennis Zhou as a percpu co-maintainer
-rw-r--r--MAINTAINERS1
-rw-r--r--include/linux/percpu-refcount.h18
-rw-r--r--lib/percpu-refcount.c2
-rw-r--r--mm/percpu-km.c8
-rw-r--r--mm/percpu-vm.c18
-rw-r--r--mm/percpu.c67
6 files changed, 74 insertions, 40 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 205c8fc12a9c..4e62756936fa 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10844,6 +10844,7 @@ F: drivers/platform/x86/peaq-wmi.c
10844PER-CPU MEMORY ALLOCATOR 10844PER-CPU MEMORY ALLOCATOR
10845M: Tejun Heo <tj@kernel.org> 10845M: Tejun Heo <tj@kernel.org>
10846M: Christoph Lameter <cl@linux.com> 10846M: Christoph Lameter <cl@linux.com>
10847M: Dennis Zhou <dennisszhou@gmail.com>
10847T: git git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu.git 10848T: git git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu.git
10848S: Maintained 10849S: Maintained
10849F: include/linux/percpu*.h 10850F: include/linux/percpu*.h
diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index 864d167a1073..009cdf3d65b6 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -30,10 +30,14 @@
30 * calls io_destroy() or the process exits. 30 * calls io_destroy() or the process exits.
31 * 31 *
32 * In the aio code, kill_ioctx() is called when we wish to destroy a kioctx; it 32 * In the aio code, kill_ioctx() is called when we wish to destroy a kioctx; it
33 * calls percpu_ref_kill(), then hlist_del_rcu() and synchronize_rcu() to remove 33 * removes the kioctx from the proccess's table of kioctxs and kills percpu_ref.
34 * the kioctx from the proccess's list of kioctxs - after that, there can't be 34 * After that, there can't be any new users of the kioctx (from lookup_ioctx())
35 * any new users of the kioctx (from lookup_ioctx()) and it's then safe to drop 35 * and it's then safe to drop the initial ref with percpu_ref_put().
36 * the initial ref with percpu_ref_put(). 36 *
37 * Note that the free path, free_ioctx(), needs to go through explicit call_rcu()
38 * to synchronize with RCU protected lookup_ioctx(). percpu_ref operations don't
39 * imply RCU grace periods of any kind and if a user wants to combine percpu_ref
40 * with RCU protection, it must be done explicitly.
37 * 41 *
38 * Code that does a two stage shutdown like this often needs some kind of 42 * Code that does a two stage shutdown like this often needs some kind of
39 * explicit synchronization to ensure the initial refcount can only be dropped 43 * explicit synchronization to ensure the initial refcount can only be dropped
@@ -113,8 +117,10 @@ void percpu_ref_reinit(struct percpu_ref *ref);
113 * Must be used to drop the initial ref on a percpu refcount; must be called 117 * Must be used to drop the initial ref on a percpu refcount; must be called
114 * precisely once before shutdown. 118 * precisely once before shutdown.
115 * 119 *
116 * Puts @ref in non percpu mode, then does a call_rcu() before gathering up the 120 * Switches @ref into atomic mode before gathering up the percpu counters
117 * percpu counters and dropping the initial ref. 121 * and dropping the initial ref.
122 *
123 * There are no implied RCU grace periods between kill and release.
118 */ 124 */
119static inline void percpu_ref_kill(struct percpu_ref *ref) 125static inline void percpu_ref_kill(struct percpu_ref *ref)
120{ 126{
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index 30e7dd88148b..9f96fa7bc000 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -322,6 +322,8 @@ EXPORT_SYMBOL_GPL(percpu_ref_switch_to_percpu);
322 * This function normally doesn't block and can be called from any context 322 * This function normally doesn't block and can be called from any context
323 * but it may block if @confirm_kill is specified and @ref is in the 323 * but it may block if @confirm_kill is specified and @ref is in the
324 * process of switching to atomic mode by percpu_ref_switch_to_atomic(). 324 * process of switching to atomic mode by percpu_ref_switch_to_atomic().
325 *
326 * There are no implied RCU grace periods between kill and release.
325 */ 327 */
326void percpu_ref_kill_and_confirm(struct percpu_ref *ref, 328void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
327 percpu_ref_func_t *confirm_kill) 329 percpu_ref_func_t *confirm_kill)
diff --git a/mm/percpu-km.c b/mm/percpu-km.c
index d2a76642c4ae..38de70ab1a0d 100644
--- a/mm/percpu-km.c
+++ b/mm/percpu-km.c
@@ -34,7 +34,7 @@
34#include <linux/log2.h> 34#include <linux/log2.h>
35 35
36static int pcpu_populate_chunk(struct pcpu_chunk *chunk, 36static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
37 int page_start, int page_end) 37 int page_start, int page_end, gfp_t gfp)
38{ 38{
39 return 0; 39 return 0;
40} 40}
@@ -45,18 +45,18 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
45 /* nada */ 45 /* nada */
46} 46}
47 47
48static struct pcpu_chunk *pcpu_create_chunk(void) 48static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp)
49{ 49{
50 const int nr_pages = pcpu_group_sizes[0] >> PAGE_SHIFT; 50 const int nr_pages = pcpu_group_sizes[0] >> PAGE_SHIFT;
51 struct pcpu_chunk *chunk; 51 struct pcpu_chunk *chunk;
52 struct page *pages; 52 struct page *pages;
53 int i; 53 int i;
54 54
55 chunk = pcpu_alloc_chunk(); 55 chunk = pcpu_alloc_chunk(gfp);
56 if (!chunk) 56 if (!chunk)
57 return NULL; 57 return NULL;
58 58
59 pages = alloc_pages(GFP_KERNEL, order_base_2(nr_pages)); 59 pages = alloc_pages(gfp, order_base_2(nr_pages));
60 if (!pages) { 60 if (!pages) {
61 pcpu_free_chunk(chunk); 61 pcpu_free_chunk(chunk);
62 return NULL; 62 return NULL;
diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c
index 9158e5a81391..d8078de912de 100644
--- a/mm/percpu-vm.c
+++ b/mm/percpu-vm.c
@@ -37,7 +37,7 @@ static struct page **pcpu_get_pages(void)
37 lockdep_assert_held(&pcpu_alloc_mutex); 37 lockdep_assert_held(&pcpu_alloc_mutex);
38 38
39 if (!pages) 39 if (!pages)
40 pages = pcpu_mem_zalloc(pages_size); 40 pages = pcpu_mem_zalloc(pages_size, GFP_KERNEL);
41 return pages; 41 return pages;
42} 42}
43 43
@@ -73,18 +73,21 @@ static void pcpu_free_pages(struct pcpu_chunk *chunk,
73 * @pages: array to put the allocated pages into, indexed by pcpu_page_idx() 73 * @pages: array to put the allocated pages into, indexed by pcpu_page_idx()
74 * @page_start: page index of the first page to be allocated 74 * @page_start: page index of the first page to be allocated
75 * @page_end: page index of the last page to be allocated + 1 75 * @page_end: page index of the last page to be allocated + 1
76 * @gfp: allocation flags passed to the underlying allocator
76 * 77 *
77 * Allocate pages [@page_start,@page_end) into @pages for all units. 78 * Allocate pages [@page_start,@page_end) into @pages for all units.
78 * The allocation is for @chunk. Percpu core doesn't care about the 79 * The allocation is for @chunk. Percpu core doesn't care about the
79 * content of @pages and will pass it verbatim to pcpu_map_pages(). 80 * content of @pages and will pass it verbatim to pcpu_map_pages().
80 */ 81 */
81static int pcpu_alloc_pages(struct pcpu_chunk *chunk, 82static int pcpu_alloc_pages(struct pcpu_chunk *chunk,
82 struct page **pages, int page_start, int page_end) 83 struct page **pages, int page_start, int page_end,
84 gfp_t gfp)
83{ 85{
84 const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM;
85 unsigned int cpu, tcpu; 86 unsigned int cpu, tcpu;
86 int i; 87 int i;
87 88
89 gfp |= __GFP_HIGHMEM;
90
88 for_each_possible_cpu(cpu) { 91 for_each_possible_cpu(cpu) {
89 for (i = page_start; i < page_end; i++) { 92 for (i = page_start; i < page_end; i++) {
90 struct page **pagep = &pages[pcpu_page_idx(cpu, i)]; 93 struct page **pagep = &pages[pcpu_page_idx(cpu, i)];
@@ -262,6 +265,7 @@ static void pcpu_post_map_flush(struct pcpu_chunk *chunk,
262 * @chunk: chunk of interest 265 * @chunk: chunk of interest
263 * @page_start: the start page 266 * @page_start: the start page
264 * @page_end: the end page 267 * @page_end: the end page
268 * @gfp: allocation flags passed to the underlying memory allocator
265 * 269 *
266 * For each cpu, populate and map pages [@page_start,@page_end) into 270 * For each cpu, populate and map pages [@page_start,@page_end) into
267 * @chunk. 271 * @chunk.
@@ -270,7 +274,7 @@ static void pcpu_post_map_flush(struct pcpu_chunk *chunk,
270 * pcpu_alloc_mutex, does GFP_KERNEL allocation. 274 * pcpu_alloc_mutex, does GFP_KERNEL allocation.
271 */ 275 */
272static int pcpu_populate_chunk(struct pcpu_chunk *chunk, 276static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
273 int page_start, int page_end) 277 int page_start, int page_end, gfp_t gfp)
274{ 278{
275 struct page **pages; 279 struct page **pages;
276 280
@@ -278,7 +282,7 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
278 if (!pages) 282 if (!pages)
279 return -ENOMEM; 283 return -ENOMEM;
280 284
281 if (pcpu_alloc_pages(chunk, pages, page_start, page_end)) 285 if (pcpu_alloc_pages(chunk, pages, page_start, page_end, gfp))
282 return -ENOMEM; 286 return -ENOMEM;
283 287
284 if (pcpu_map_pages(chunk, pages, page_start, page_end)) { 288 if (pcpu_map_pages(chunk, pages, page_start, page_end)) {
@@ -325,12 +329,12 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
325 pcpu_free_pages(chunk, pages, page_start, page_end); 329 pcpu_free_pages(chunk, pages, page_start, page_end);
326} 330}
327 331
328static struct pcpu_chunk *pcpu_create_chunk(void) 332static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp)
329{ 333{
330 struct pcpu_chunk *chunk; 334 struct pcpu_chunk *chunk;
331 struct vm_struct **vms; 335 struct vm_struct **vms;
332 336
333 chunk = pcpu_alloc_chunk(); 337 chunk = pcpu_alloc_chunk(gfp);
334 if (!chunk) 338 if (!chunk)
335 return NULL; 339 return NULL;
336 340
diff --git a/mm/percpu.c b/mm/percpu.c
index 50e7fdf84055..9297098519a6 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -80,6 +80,7 @@
80#include <linux/vmalloc.h> 80#include <linux/vmalloc.h>
81#include <linux/workqueue.h> 81#include <linux/workqueue.h>
82#include <linux/kmemleak.h> 82#include <linux/kmemleak.h>
83#include <linux/sched.h>
83 84
84#include <asm/cacheflush.h> 85#include <asm/cacheflush.h>
85#include <asm/sections.h> 86#include <asm/sections.h>
@@ -447,26 +448,25 @@ static void pcpu_next_fit_region(struct pcpu_chunk *chunk, int alloc_bits,
447/** 448/**
448 * pcpu_mem_zalloc - allocate memory 449 * pcpu_mem_zalloc - allocate memory
449 * @size: bytes to allocate 450 * @size: bytes to allocate
451 * @gfp: allocation flags
450 * 452 *
451 * Allocate @size bytes. If @size is smaller than PAGE_SIZE, 453 * Allocate @size bytes. If @size is smaller than PAGE_SIZE,
452 * kzalloc() is used; otherwise, vzalloc() is used. The returned 454 * kzalloc() is used; otherwise, the equivalent of vzalloc() is used.
453 * memory is always zeroed. 455 * This is to facilitate passing through whitelisted flags. The
454 * 456 * returned memory is always zeroed.
455 * CONTEXT:
456 * Does GFP_KERNEL allocation.
457 * 457 *
458 * RETURNS: 458 * RETURNS:
459 * Pointer to the allocated area on success, NULL on failure. 459 * Pointer to the allocated area on success, NULL on failure.
460 */ 460 */
461static void *pcpu_mem_zalloc(size_t size) 461static void *pcpu_mem_zalloc(size_t size, gfp_t gfp)
462{ 462{
463 if (WARN_ON_ONCE(!slab_is_available())) 463 if (WARN_ON_ONCE(!slab_is_available()))
464 return NULL; 464 return NULL;
465 465
466 if (size <= PAGE_SIZE) 466 if (size <= PAGE_SIZE)
467 return kzalloc(size, GFP_KERNEL); 467 return kzalloc(size, gfp);
468 else 468 else
469 return vzalloc(size); 469 return __vmalloc(size, gfp | __GFP_ZERO, PAGE_KERNEL);
470} 470}
471 471
472/** 472/**
@@ -1154,12 +1154,12 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr,
1154 return chunk; 1154 return chunk;
1155} 1155}
1156 1156
1157static struct pcpu_chunk *pcpu_alloc_chunk(void) 1157static struct pcpu_chunk *pcpu_alloc_chunk(gfp_t gfp)
1158{ 1158{
1159 struct pcpu_chunk *chunk; 1159 struct pcpu_chunk *chunk;
1160 int region_bits; 1160 int region_bits;
1161 1161
1162 chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size); 1162 chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size, gfp);
1163 if (!chunk) 1163 if (!chunk)
1164 return NULL; 1164 return NULL;
1165 1165
@@ -1168,17 +1168,17 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void)
1168 region_bits = pcpu_chunk_map_bits(chunk); 1168 region_bits = pcpu_chunk_map_bits(chunk);
1169 1169
1170 chunk->alloc_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits) * 1170 chunk->alloc_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits) *
1171 sizeof(chunk->alloc_map[0])); 1171 sizeof(chunk->alloc_map[0]), gfp);
1172 if (!chunk->alloc_map) 1172 if (!chunk->alloc_map)
1173 goto alloc_map_fail; 1173 goto alloc_map_fail;
1174 1174
1175 chunk->bound_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits + 1) * 1175 chunk->bound_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits + 1) *
1176 sizeof(chunk->bound_map[0])); 1176 sizeof(chunk->bound_map[0]), gfp);
1177 if (!chunk->bound_map) 1177 if (!chunk->bound_map)
1178 goto bound_map_fail; 1178 goto bound_map_fail;
1179 1179
1180 chunk->md_blocks = pcpu_mem_zalloc(pcpu_chunk_nr_blocks(chunk) * 1180 chunk->md_blocks = pcpu_mem_zalloc(pcpu_chunk_nr_blocks(chunk) *
1181 sizeof(chunk->md_blocks[0])); 1181 sizeof(chunk->md_blocks[0]), gfp);
1182 if (!chunk->md_blocks) 1182 if (!chunk->md_blocks)
1183 goto md_blocks_fail; 1183 goto md_blocks_fail;
1184 1184
@@ -1277,9 +1277,11 @@ static void pcpu_chunk_depopulated(struct pcpu_chunk *chunk,
1277 * pcpu_addr_to_page - translate address to physical address 1277 * pcpu_addr_to_page - translate address to physical address
1278 * pcpu_verify_alloc_info - check alloc_info is acceptable during init 1278 * pcpu_verify_alloc_info - check alloc_info is acceptable during init
1279 */ 1279 */
1280static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size); 1280static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
1281static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size); 1281 int page_start, int page_end, gfp_t gfp);
1282static struct pcpu_chunk *pcpu_create_chunk(void); 1282static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
1283 int page_start, int page_end);
1284static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp);
1283static void pcpu_destroy_chunk(struct pcpu_chunk *chunk); 1285static void pcpu_destroy_chunk(struct pcpu_chunk *chunk);
1284static struct page *pcpu_addr_to_page(void *addr); 1286static struct page *pcpu_addr_to_page(void *addr);
1285static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai); 1287static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai);
@@ -1339,6 +1341,8 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
1339static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, 1341static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
1340 gfp_t gfp) 1342 gfp_t gfp)
1341{ 1343{
1344 /* whitelisted flags that can be passed to the backing allocators */
1345 gfp_t pcpu_gfp = gfp & (GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
1342 bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL; 1346 bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL;
1343 bool do_warn = !(gfp & __GFP_NOWARN); 1347 bool do_warn = !(gfp & __GFP_NOWARN);
1344 static int warn_limit = 10; 1348 static int warn_limit = 10;
@@ -1369,8 +1373,17 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
1369 return NULL; 1373 return NULL;
1370 } 1374 }
1371 1375
1372 if (!is_atomic) 1376 if (!is_atomic) {
1373 mutex_lock(&pcpu_alloc_mutex); 1377 /*
1378 * pcpu_balance_workfn() allocates memory under this mutex,
1379 * and it may wait for memory reclaim. Allow current task
1380 * to become OOM victim, in case of memory pressure.
1381 */
1382 if (gfp & __GFP_NOFAIL)
1383 mutex_lock(&pcpu_alloc_mutex);
1384 else if (mutex_lock_killable(&pcpu_alloc_mutex))
1385 return NULL;
1386 }
1374 1387
1375 spin_lock_irqsave(&pcpu_lock, flags); 1388 spin_lock_irqsave(&pcpu_lock, flags);
1376 1389
@@ -1421,7 +1434,7 @@ restart:
1421 } 1434 }
1422 1435
1423 if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) { 1436 if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) {
1424 chunk = pcpu_create_chunk(); 1437 chunk = pcpu_create_chunk(pcpu_gfp);
1425 if (!chunk) { 1438 if (!chunk) {
1426 err = "failed to allocate new chunk"; 1439 err = "failed to allocate new chunk";
1427 goto fail; 1440 goto fail;
@@ -1450,7 +1463,7 @@ area_found:
1450 page_start, page_end) { 1463 page_start, page_end) {
1451 WARN_ON(chunk->immutable); 1464 WARN_ON(chunk->immutable);
1452 1465
1453 ret = pcpu_populate_chunk(chunk, rs, re); 1466 ret = pcpu_populate_chunk(chunk, rs, re, pcpu_gfp);
1454 1467
1455 spin_lock_irqsave(&pcpu_lock, flags); 1468 spin_lock_irqsave(&pcpu_lock, flags);
1456 if (ret) { 1469 if (ret) {
@@ -1561,10 +1574,17 @@ void __percpu *__alloc_reserved_percpu(size_t size, size_t align)
1561 * pcpu_balance_workfn - manage the amount of free chunks and populated pages 1574 * pcpu_balance_workfn - manage the amount of free chunks and populated pages
1562 * @work: unused 1575 * @work: unused
1563 * 1576 *
1564 * Reclaim all fully free chunks except for the first one. 1577 * Reclaim all fully free chunks except for the first one. This is also
1578 * responsible for maintaining the pool of empty populated pages. However,
1579 * it is possible that this is called when physical memory is scarce causing
1580 * OOM killer to be triggered. We should avoid doing so until an actual
1581 * allocation causes the failure as it is possible that requests can be
1582 * serviced from already backed regions.
1565 */ 1583 */
1566static void pcpu_balance_workfn(struct work_struct *work) 1584static void pcpu_balance_workfn(struct work_struct *work)
1567{ 1585{
1586 /* gfp flags passed to underlying allocators */
1587 const gfp_t gfp = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN;
1568 LIST_HEAD(to_free); 1588 LIST_HEAD(to_free);
1569 struct list_head *free_head = &pcpu_slot[pcpu_nr_slots - 1]; 1589 struct list_head *free_head = &pcpu_slot[pcpu_nr_slots - 1];
1570 struct pcpu_chunk *chunk, *next; 1590 struct pcpu_chunk *chunk, *next;
@@ -1600,6 +1620,7 @@ static void pcpu_balance_workfn(struct work_struct *work)
1600 spin_unlock_irq(&pcpu_lock); 1620 spin_unlock_irq(&pcpu_lock);
1601 } 1621 }
1602 pcpu_destroy_chunk(chunk); 1622 pcpu_destroy_chunk(chunk);
1623 cond_resched();
1603 } 1624 }
1604 1625
1605 /* 1626 /*
@@ -1645,7 +1666,7 @@ retry_pop:
1645 chunk->nr_pages) { 1666 chunk->nr_pages) {
1646 int nr = min(re - rs, nr_to_pop); 1667 int nr = min(re - rs, nr_to_pop);
1647 1668
1648 ret = pcpu_populate_chunk(chunk, rs, rs + nr); 1669 ret = pcpu_populate_chunk(chunk, rs, rs + nr, gfp);
1649 if (!ret) { 1670 if (!ret) {
1650 nr_to_pop -= nr; 1671 nr_to_pop -= nr;
1651 spin_lock_irq(&pcpu_lock); 1672 spin_lock_irq(&pcpu_lock);
@@ -1662,7 +1683,7 @@ retry_pop:
1662 1683
1663 if (nr_to_pop) { 1684 if (nr_to_pop) {
1664 /* ran out of chunks to populate, create a new one and retry */ 1685 /* ran out of chunks to populate, create a new one and retry */
1665 chunk = pcpu_create_chunk(); 1686 chunk = pcpu_create_chunk(gfp);
1666 if (chunk) { 1687 if (chunk) {
1667 spin_lock_irq(&pcpu_lock); 1688 spin_lock_irq(&pcpu_lock);
1668 pcpu_chunk_relocate(chunk, -1); 1689 pcpu_chunk_relocate(chunk, -1);