aboutsummaryrefslogtreecommitdiffstats
path: root/mm/percpu.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/percpu.c')
-rw-r--r--mm/percpu.c67
1 files changed, 44 insertions, 23 deletions
diff --git a/mm/percpu.c b/mm/percpu.c
index 50e7fdf84055..9297098519a6 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -80,6 +80,7 @@
80#include <linux/vmalloc.h> 80#include <linux/vmalloc.h>
81#include <linux/workqueue.h> 81#include <linux/workqueue.h>
82#include <linux/kmemleak.h> 82#include <linux/kmemleak.h>
83#include <linux/sched.h>
83 84
84#include <asm/cacheflush.h> 85#include <asm/cacheflush.h>
85#include <asm/sections.h> 86#include <asm/sections.h>
@@ -447,26 +448,25 @@ static void pcpu_next_fit_region(struct pcpu_chunk *chunk, int alloc_bits,
447/** 448/**
448 * pcpu_mem_zalloc - allocate memory 449 * pcpu_mem_zalloc - allocate memory
449 * @size: bytes to allocate 450 * @size: bytes to allocate
451 * @gfp: allocation flags
450 * 452 *
451 * Allocate @size bytes. If @size is smaller than PAGE_SIZE, 453 * Allocate @size bytes. If @size is smaller than PAGE_SIZE,
452 * kzalloc() is used; otherwise, vzalloc() is used. The returned 454 * kzalloc() is used; otherwise, the equivalent of vzalloc() is used.
453 * memory is always zeroed. 455 * This is to facilitate passing through whitelisted flags. The
454 * 456 * returned memory is always zeroed.
455 * CONTEXT:
456 * Does GFP_KERNEL allocation.
457 * 457 *
458 * RETURNS: 458 * RETURNS:
459 * Pointer to the allocated area on success, NULL on failure. 459 * Pointer to the allocated area on success, NULL on failure.
460 */ 460 */
461static void *pcpu_mem_zalloc(size_t size) 461static void *pcpu_mem_zalloc(size_t size, gfp_t gfp)
462{ 462{
463 if (WARN_ON_ONCE(!slab_is_available())) 463 if (WARN_ON_ONCE(!slab_is_available()))
464 return NULL; 464 return NULL;
465 465
466 if (size <= PAGE_SIZE) 466 if (size <= PAGE_SIZE)
467 return kzalloc(size, GFP_KERNEL); 467 return kzalloc(size, gfp);
468 else 468 else
469 return vzalloc(size); 469 return __vmalloc(size, gfp | __GFP_ZERO, PAGE_KERNEL);
470} 470}
471 471
472/** 472/**
@@ -1154,12 +1154,12 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr,
1154 return chunk; 1154 return chunk;
1155} 1155}
1156 1156
1157static struct pcpu_chunk *pcpu_alloc_chunk(void) 1157static struct pcpu_chunk *pcpu_alloc_chunk(gfp_t gfp)
1158{ 1158{
1159 struct pcpu_chunk *chunk; 1159 struct pcpu_chunk *chunk;
1160 int region_bits; 1160 int region_bits;
1161 1161
1162 chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size); 1162 chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size, gfp);
1163 if (!chunk) 1163 if (!chunk)
1164 return NULL; 1164 return NULL;
1165 1165
@@ -1168,17 +1168,17 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void)
1168 region_bits = pcpu_chunk_map_bits(chunk); 1168 region_bits = pcpu_chunk_map_bits(chunk);
1169 1169
1170 chunk->alloc_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits) * 1170 chunk->alloc_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits) *
1171 sizeof(chunk->alloc_map[0])); 1171 sizeof(chunk->alloc_map[0]), gfp);
1172 if (!chunk->alloc_map) 1172 if (!chunk->alloc_map)
1173 goto alloc_map_fail; 1173 goto alloc_map_fail;
1174 1174
1175 chunk->bound_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits + 1) * 1175 chunk->bound_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits + 1) *
1176 sizeof(chunk->bound_map[0])); 1176 sizeof(chunk->bound_map[0]), gfp);
1177 if (!chunk->bound_map) 1177 if (!chunk->bound_map)
1178 goto bound_map_fail; 1178 goto bound_map_fail;
1179 1179
1180 chunk->md_blocks = pcpu_mem_zalloc(pcpu_chunk_nr_blocks(chunk) * 1180 chunk->md_blocks = pcpu_mem_zalloc(pcpu_chunk_nr_blocks(chunk) *
1181 sizeof(chunk->md_blocks[0])); 1181 sizeof(chunk->md_blocks[0]), gfp);
1182 if (!chunk->md_blocks) 1182 if (!chunk->md_blocks)
1183 goto md_blocks_fail; 1183 goto md_blocks_fail;
1184 1184
@@ -1277,9 +1277,11 @@ static void pcpu_chunk_depopulated(struct pcpu_chunk *chunk,
1277 * pcpu_addr_to_page - translate address to physical address 1277 * pcpu_addr_to_page - translate address to physical address
1278 * pcpu_verify_alloc_info - check alloc_info is acceptable during init 1278 * pcpu_verify_alloc_info - check alloc_info is acceptable during init
1279 */ 1279 */
1280static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size); 1280static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
1281static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size); 1281 int page_start, int page_end, gfp_t gfp);
1282static struct pcpu_chunk *pcpu_create_chunk(void); 1282static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
1283 int page_start, int page_end);
1284static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp);
1283static void pcpu_destroy_chunk(struct pcpu_chunk *chunk); 1285static void pcpu_destroy_chunk(struct pcpu_chunk *chunk);
1284static struct page *pcpu_addr_to_page(void *addr); 1286static struct page *pcpu_addr_to_page(void *addr);
1285static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai); 1287static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai);
@@ -1339,6 +1341,8 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
1339static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, 1341static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
1340 gfp_t gfp) 1342 gfp_t gfp)
1341{ 1343{
1344 /* whitelisted flags that can be passed to the backing allocators */
1345 gfp_t pcpu_gfp = gfp & (GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
1342 bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL; 1346 bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL;
1343 bool do_warn = !(gfp & __GFP_NOWARN); 1347 bool do_warn = !(gfp & __GFP_NOWARN);
1344 static int warn_limit = 10; 1348 static int warn_limit = 10;
@@ -1369,8 +1373,17 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
1369 return NULL; 1373 return NULL;
1370 } 1374 }
1371 1375
1372 if (!is_atomic) 1376 if (!is_atomic) {
1373 mutex_lock(&pcpu_alloc_mutex); 1377 /*
1378 * pcpu_balance_workfn() allocates memory under this mutex,
1379 * and it may wait for memory reclaim. Allow current task
1380 * to become OOM victim, in case of memory pressure.
1381 */
1382 if (gfp & __GFP_NOFAIL)
1383 mutex_lock(&pcpu_alloc_mutex);
1384 else if (mutex_lock_killable(&pcpu_alloc_mutex))
1385 return NULL;
1386 }
1374 1387
1375 spin_lock_irqsave(&pcpu_lock, flags); 1388 spin_lock_irqsave(&pcpu_lock, flags);
1376 1389
@@ -1421,7 +1434,7 @@ restart:
1421 } 1434 }
1422 1435
1423 if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) { 1436 if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) {
1424 chunk = pcpu_create_chunk(); 1437 chunk = pcpu_create_chunk(pcpu_gfp);
1425 if (!chunk) { 1438 if (!chunk) {
1426 err = "failed to allocate new chunk"; 1439 err = "failed to allocate new chunk";
1427 goto fail; 1440 goto fail;
@@ -1450,7 +1463,7 @@ area_found:
1450 page_start, page_end) { 1463 page_start, page_end) {
1451 WARN_ON(chunk->immutable); 1464 WARN_ON(chunk->immutable);
1452 1465
1453 ret = pcpu_populate_chunk(chunk, rs, re); 1466 ret = pcpu_populate_chunk(chunk, rs, re, pcpu_gfp);
1454 1467
1455 spin_lock_irqsave(&pcpu_lock, flags); 1468 spin_lock_irqsave(&pcpu_lock, flags);
1456 if (ret) { 1469 if (ret) {
@@ -1561,10 +1574,17 @@ void __percpu *__alloc_reserved_percpu(size_t size, size_t align)
1561 * pcpu_balance_workfn - manage the amount of free chunks and populated pages 1574 * pcpu_balance_workfn - manage the amount of free chunks and populated pages
1562 * @work: unused 1575 * @work: unused
1563 * 1576 *
1564 * Reclaim all fully free chunks except for the first one. 1577 * Reclaim all fully free chunks except for the first one. This is also
1578 * responsible for maintaining the pool of empty populated pages. However,
1579 * it is possible that this is called when physical memory is scarce causing
1580 * OOM killer to be triggered. We should avoid doing so until an actual
1581 * allocation causes the failure as it is possible that requests can be
1582 * serviced from already backed regions.
1565 */ 1583 */
1566static void pcpu_balance_workfn(struct work_struct *work) 1584static void pcpu_balance_workfn(struct work_struct *work)
1567{ 1585{
1586 /* gfp flags passed to underlying allocators */
1587 const gfp_t gfp = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN;
1568 LIST_HEAD(to_free); 1588 LIST_HEAD(to_free);
1569 struct list_head *free_head = &pcpu_slot[pcpu_nr_slots - 1]; 1589 struct list_head *free_head = &pcpu_slot[pcpu_nr_slots - 1];
1570 struct pcpu_chunk *chunk, *next; 1590 struct pcpu_chunk *chunk, *next;
@@ -1600,6 +1620,7 @@ static void pcpu_balance_workfn(struct work_struct *work)
1600 spin_unlock_irq(&pcpu_lock); 1620 spin_unlock_irq(&pcpu_lock);
1601 } 1621 }
1602 pcpu_destroy_chunk(chunk); 1622 pcpu_destroy_chunk(chunk);
1623 cond_resched();
1603 } 1624 }
1604 1625
1605 /* 1626 /*
@@ -1645,7 +1666,7 @@ retry_pop:
1645 chunk->nr_pages) { 1666 chunk->nr_pages) {
1646 int nr = min(re - rs, nr_to_pop); 1667 int nr = min(re - rs, nr_to_pop);
1647 1668
1648 ret = pcpu_populate_chunk(chunk, rs, rs + nr); 1669 ret = pcpu_populate_chunk(chunk, rs, rs + nr, gfp);
1649 if (!ret) { 1670 if (!ret) {
1650 nr_to_pop -= nr; 1671 nr_to_pop -= nr;
1651 spin_lock_irq(&pcpu_lock); 1672 spin_lock_irq(&pcpu_lock);
@@ -1662,7 +1683,7 @@ retry_pop:
1662 1683
1663 if (nr_to_pop) { 1684 if (nr_to_pop) {
1664 /* ran out of chunks to populate, create a new one and retry */ 1685 /* ran out of chunks to populate, create a new one and retry */
1665 chunk = pcpu_create_chunk(); 1686 chunk = pcpu_create_chunk(gfp);
1666 if (chunk) { 1687 if (chunk) {
1667 spin_lock_irq(&pcpu_lock); 1688 spin_lock_irq(&pcpu_lock);
1668 pcpu_chunk_relocate(chunk, -1); 1689 pcpu_chunk_relocate(chunk, -1);