diff options
Diffstat (limited to 'mm/percpu.c')
-rw-r--r-- | mm/percpu.c | 67 |
1 files changed, 44 insertions, 23 deletions
diff --git a/mm/percpu.c b/mm/percpu.c index 50e7fdf84055..9297098519a6 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
@@ -80,6 +80,7 @@ | |||
80 | #include <linux/vmalloc.h> | 80 | #include <linux/vmalloc.h> |
81 | #include <linux/workqueue.h> | 81 | #include <linux/workqueue.h> |
82 | #include <linux/kmemleak.h> | 82 | #include <linux/kmemleak.h> |
83 | #include <linux/sched.h> | ||
83 | 84 | ||
84 | #include <asm/cacheflush.h> | 85 | #include <asm/cacheflush.h> |
85 | #include <asm/sections.h> | 86 | #include <asm/sections.h> |
@@ -447,26 +448,25 @@ static void pcpu_next_fit_region(struct pcpu_chunk *chunk, int alloc_bits, | |||
447 | /** | 448 | /** |
448 | * pcpu_mem_zalloc - allocate memory | 449 | * pcpu_mem_zalloc - allocate memory |
449 | * @size: bytes to allocate | 450 | * @size: bytes to allocate |
451 | * @gfp: allocation flags | ||
450 | * | 452 | * |
451 | * Allocate @size bytes. If @size is smaller than PAGE_SIZE, | 453 | * Allocate @size bytes. If @size is smaller than PAGE_SIZE, |
452 | * kzalloc() is used; otherwise, vzalloc() is used. The returned | 454 | * kzalloc() is used; otherwise, the equivalent of vzalloc() is used. |
453 | * memory is always zeroed. | 455 | * This is to facilitate passing through whitelisted flags. The |
454 | * | 456 | * returned memory is always zeroed. |
455 | * CONTEXT: | ||
456 | * Does GFP_KERNEL allocation. | ||
457 | * | 457 | * |
458 | * RETURNS: | 458 | * RETURNS: |
459 | * Pointer to the allocated area on success, NULL on failure. | 459 | * Pointer to the allocated area on success, NULL on failure. |
460 | */ | 460 | */ |
461 | static void *pcpu_mem_zalloc(size_t size) | 461 | static void *pcpu_mem_zalloc(size_t size, gfp_t gfp) |
462 | { | 462 | { |
463 | if (WARN_ON_ONCE(!slab_is_available())) | 463 | if (WARN_ON_ONCE(!slab_is_available())) |
464 | return NULL; | 464 | return NULL; |
465 | 465 | ||
466 | if (size <= PAGE_SIZE) | 466 | if (size <= PAGE_SIZE) |
467 | return kzalloc(size, GFP_KERNEL); | 467 | return kzalloc(size, gfp); |
468 | else | 468 | else |
469 | return vzalloc(size); | 469 | return __vmalloc(size, gfp | __GFP_ZERO, PAGE_KERNEL); |
470 | } | 470 | } |
471 | 471 | ||
472 | /** | 472 | /** |
@@ -1154,12 +1154,12 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr, | |||
1154 | return chunk; | 1154 | return chunk; |
1155 | } | 1155 | } |
1156 | 1156 | ||
1157 | static struct pcpu_chunk *pcpu_alloc_chunk(void) | 1157 | static struct pcpu_chunk *pcpu_alloc_chunk(gfp_t gfp) |
1158 | { | 1158 | { |
1159 | struct pcpu_chunk *chunk; | 1159 | struct pcpu_chunk *chunk; |
1160 | int region_bits; | 1160 | int region_bits; |
1161 | 1161 | ||
1162 | chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size); | 1162 | chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size, gfp); |
1163 | if (!chunk) | 1163 | if (!chunk) |
1164 | return NULL; | 1164 | return NULL; |
1165 | 1165 | ||
@@ -1168,17 +1168,17 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void) | |||
1168 | region_bits = pcpu_chunk_map_bits(chunk); | 1168 | region_bits = pcpu_chunk_map_bits(chunk); |
1169 | 1169 | ||
1170 | chunk->alloc_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits) * | 1170 | chunk->alloc_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits) * |
1171 | sizeof(chunk->alloc_map[0])); | 1171 | sizeof(chunk->alloc_map[0]), gfp); |
1172 | if (!chunk->alloc_map) | 1172 | if (!chunk->alloc_map) |
1173 | goto alloc_map_fail; | 1173 | goto alloc_map_fail; |
1174 | 1174 | ||
1175 | chunk->bound_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits + 1) * | 1175 | chunk->bound_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits + 1) * |
1176 | sizeof(chunk->bound_map[0])); | 1176 | sizeof(chunk->bound_map[0]), gfp); |
1177 | if (!chunk->bound_map) | 1177 | if (!chunk->bound_map) |
1178 | goto bound_map_fail; | 1178 | goto bound_map_fail; |
1179 | 1179 | ||
1180 | chunk->md_blocks = pcpu_mem_zalloc(pcpu_chunk_nr_blocks(chunk) * | 1180 | chunk->md_blocks = pcpu_mem_zalloc(pcpu_chunk_nr_blocks(chunk) * |
1181 | sizeof(chunk->md_blocks[0])); | 1181 | sizeof(chunk->md_blocks[0]), gfp); |
1182 | if (!chunk->md_blocks) | 1182 | if (!chunk->md_blocks) |
1183 | goto md_blocks_fail; | 1183 | goto md_blocks_fail; |
1184 | 1184 | ||
@@ -1277,9 +1277,11 @@ static void pcpu_chunk_depopulated(struct pcpu_chunk *chunk, | |||
1277 | * pcpu_addr_to_page - translate address to physical address | 1277 | * pcpu_addr_to_page - translate address to physical address |
1278 | * pcpu_verify_alloc_info - check alloc_info is acceptable during init | 1278 | * pcpu_verify_alloc_info - check alloc_info is acceptable during init |
1279 | */ | 1279 | */ |
1280 | static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size); | 1280 | static int pcpu_populate_chunk(struct pcpu_chunk *chunk, |
1281 | static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size); | 1281 | int page_start, int page_end, gfp_t gfp); |
1282 | static struct pcpu_chunk *pcpu_create_chunk(void); | 1282 | static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, |
1283 | int page_start, int page_end); | ||
1284 | static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp); | ||
1283 | static void pcpu_destroy_chunk(struct pcpu_chunk *chunk); | 1285 | static void pcpu_destroy_chunk(struct pcpu_chunk *chunk); |
1284 | static struct page *pcpu_addr_to_page(void *addr); | 1286 | static struct page *pcpu_addr_to_page(void *addr); |
1285 | static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai); | 1287 | static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai); |
@@ -1339,6 +1341,8 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) | |||
1339 | static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, | 1341 | static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, |
1340 | gfp_t gfp) | 1342 | gfp_t gfp) |
1341 | { | 1343 | { |
1344 | /* whitelisted flags that can be passed to the backing allocators */ | ||
1345 | gfp_t pcpu_gfp = gfp & (GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); | ||
1342 | bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL; | 1346 | bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL; |
1343 | bool do_warn = !(gfp & __GFP_NOWARN); | 1347 | bool do_warn = !(gfp & __GFP_NOWARN); |
1344 | static int warn_limit = 10; | 1348 | static int warn_limit = 10; |
@@ -1369,8 +1373,17 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, | |||
1369 | return NULL; | 1373 | return NULL; |
1370 | } | 1374 | } |
1371 | 1375 | ||
1372 | if (!is_atomic) | 1376 | if (!is_atomic) { |
1373 | mutex_lock(&pcpu_alloc_mutex); | 1377 | /* |
1378 | * pcpu_balance_workfn() allocates memory under this mutex, | ||
1379 | * and it may wait for memory reclaim. Allow current task | ||
1380 | * to become OOM victim, in case of memory pressure. | ||
1381 | */ | ||
1382 | if (gfp & __GFP_NOFAIL) | ||
1383 | mutex_lock(&pcpu_alloc_mutex); | ||
1384 | else if (mutex_lock_killable(&pcpu_alloc_mutex)) | ||
1385 | return NULL; | ||
1386 | } | ||
1374 | 1387 | ||
1375 | spin_lock_irqsave(&pcpu_lock, flags); | 1388 | spin_lock_irqsave(&pcpu_lock, flags); |
1376 | 1389 | ||
@@ -1421,7 +1434,7 @@ restart: | |||
1421 | } | 1434 | } |
1422 | 1435 | ||
1423 | if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) { | 1436 | if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) { |
1424 | chunk = pcpu_create_chunk(); | 1437 | chunk = pcpu_create_chunk(pcpu_gfp); |
1425 | if (!chunk) { | 1438 | if (!chunk) { |
1426 | err = "failed to allocate new chunk"; | 1439 | err = "failed to allocate new chunk"; |
1427 | goto fail; | 1440 | goto fail; |
@@ -1450,7 +1463,7 @@ area_found: | |||
1450 | page_start, page_end) { | 1463 | page_start, page_end) { |
1451 | WARN_ON(chunk->immutable); | 1464 | WARN_ON(chunk->immutable); |
1452 | 1465 | ||
1453 | ret = pcpu_populate_chunk(chunk, rs, re); | 1466 | ret = pcpu_populate_chunk(chunk, rs, re, pcpu_gfp); |
1454 | 1467 | ||
1455 | spin_lock_irqsave(&pcpu_lock, flags); | 1468 | spin_lock_irqsave(&pcpu_lock, flags); |
1456 | if (ret) { | 1469 | if (ret) { |
@@ -1561,10 +1574,17 @@ void __percpu *__alloc_reserved_percpu(size_t size, size_t align) | |||
1561 | * pcpu_balance_workfn - manage the amount of free chunks and populated pages | 1574 | * pcpu_balance_workfn - manage the amount of free chunks and populated pages |
1562 | * @work: unused | 1575 | * @work: unused |
1563 | * | 1576 | * |
1564 | * Reclaim all fully free chunks except for the first one. | 1577 | * Reclaim all fully free chunks except for the first one. This is also |
1578 | * responsible for maintaining the pool of empty populated pages. However, | ||
1579 | * it is possible that this is called when physical memory is scarce causing | ||
1580 | * OOM killer to be triggered. We should avoid doing so until an actual | ||
1581 | * allocation causes the failure as it is possible that requests can be | ||
1582 | * serviced from already backed regions. | ||
1565 | */ | 1583 | */ |
1566 | static void pcpu_balance_workfn(struct work_struct *work) | 1584 | static void pcpu_balance_workfn(struct work_struct *work) |
1567 | { | 1585 | { |
1586 | /* gfp flags passed to underlying allocators */ | ||
1587 | const gfp_t gfp = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN; | ||
1568 | LIST_HEAD(to_free); | 1588 | LIST_HEAD(to_free); |
1569 | struct list_head *free_head = &pcpu_slot[pcpu_nr_slots - 1]; | 1589 | struct list_head *free_head = &pcpu_slot[pcpu_nr_slots - 1]; |
1570 | struct pcpu_chunk *chunk, *next; | 1590 | struct pcpu_chunk *chunk, *next; |
@@ -1600,6 +1620,7 @@ static void pcpu_balance_workfn(struct work_struct *work) | |||
1600 | spin_unlock_irq(&pcpu_lock); | 1620 | spin_unlock_irq(&pcpu_lock); |
1601 | } | 1621 | } |
1602 | pcpu_destroy_chunk(chunk); | 1622 | pcpu_destroy_chunk(chunk); |
1623 | cond_resched(); | ||
1603 | } | 1624 | } |
1604 | 1625 | ||
1605 | /* | 1626 | /* |
@@ -1645,7 +1666,7 @@ retry_pop: | |||
1645 | chunk->nr_pages) { | 1666 | chunk->nr_pages) { |
1646 | int nr = min(re - rs, nr_to_pop); | 1667 | int nr = min(re - rs, nr_to_pop); |
1647 | 1668 | ||
1648 | ret = pcpu_populate_chunk(chunk, rs, rs + nr); | 1669 | ret = pcpu_populate_chunk(chunk, rs, rs + nr, gfp); |
1649 | if (!ret) { | 1670 | if (!ret) { |
1650 | nr_to_pop -= nr; | 1671 | nr_to_pop -= nr; |
1651 | spin_lock_irq(&pcpu_lock); | 1672 | spin_lock_irq(&pcpu_lock); |
@@ -1662,7 +1683,7 @@ retry_pop: | |||
1662 | 1683 | ||
1663 | if (nr_to_pop) { | 1684 | if (nr_to_pop) { |
1664 | /* ran out of chunks to populate, create a new one and retry */ | 1685 | /* ran out of chunks to populate, create a new one and retry */ |
1665 | chunk = pcpu_create_chunk(); | 1686 | chunk = pcpu_create_chunk(gfp); |
1666 | if (chunk) { | 1687 | if (chunk) { |
1667 | spin_lock_irq(&pcpu_lock); | 1688 | spin_lock_irq(&pcpu_lock); |
1668 | pcpu_chunk_relocate(chunk, -1); | 1689 | pcpu_chunk_relocate(chunk, -1); |