diff options
author | Tejun Heo <tj@kernel.org> | 2016-05-25 11:48:25 -0400 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2016-05-25 11:48:25 -0400 |
commit | 4f996e234dad488e5d9ba0858bc1bae12eff82c3 (patch) | |
tree | 882a932c3883968a9276598fd536ab43413083fd | |
parent | 28165ec7a99be98123aa89540bf2cfc24df19498 (diff) |
percpu: fix synchronization between chunk->map_extend_work and chunk destruction
Atomic allocations can trigger async map extensions which is serviced
by chunk->map_extend_work. pcpu_balance_work which is responsible for
destroying idle chunks wasn't synchronizing properly against
chunk->map_extend_work and may end up freeing the chunk while the work
item is still in flight.
This patch fixes the bug by rolling async map extension operations
into pcpu_balance_work.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-and-tested-by: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Reported-by: Vlastimil Babka <vbabka@suse.cz>
Reported-by: Sasha Levin <sasha.levin@oracle.com>
Cc: stable@vger.kernel.org # v3.18+
Fixes: 9c824b6a172c ("percpu: make sure chunk->map array has available space")
-rw-r--r-- | mm/percpu.c | 57 |
1 files changed, 36 insertions, 21 deletions
diff --git a/mm/percpu.c b/mm/percpu.c index 0c59684f1ff2..b1d2a3844792 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
@@ -112,7 +112,7 @@ struct pcpu_chunk { | |||
112 | int map_used; /* # of map entries used before the sentry */ | 112 | int map_used; /* # of map entries used before the sentry */ |
113 | int map_alloc; /* # of map entries allocated */ | 113 | int map_alloc; /* # of map entries allocated */ |
114 | int *map; /* allocation map */ | 114 | int *map; /* allocation map */ |
115 | struct work_struct map_extend_work;/* async ->map[] extension */ | 115 | struct list_head map_extend_list;/* on pcpu_map_extend_chunks */ |
116 | 116 | ||
117 | void *data; /* chunk data */ | 117 | void *data; /* chunk data */ |
118 | int first_free; /* no free below this */ | 118 | int first_free; /* no free below this */ |
@@ -166,6 +166,9 @@ static DEFINE_MUTEX(pcpu_alloc_mutex); /* chunk create/destroy, [de]pop */ | |||
166 | 166 | ||
167 | static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */ | 167 | static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */ |
168 | 168 | ||
169 | /* chunks which need their map areas extended, protected by pcpu_lock */ | ||
170 | static LIST_HEAD(pcpu_map_extend_chunks); | ||
171 | |||
169 | /* | 172 | /* |
170 | * The number of empty populated pages, protected by pcpu_lock. The | 173 | * The number of empty populated pages, protected by pcpu_lock. The |
171 | * reserved chunk doesn't contribute to the count. | 174 | * reserved chunk doesn't contribute to the count. |
@@ -395,13 +398,19 @@ static int pcpu_need_to_extend(struct pcpu_chunk *chunk, bool is_atomic) | |||
395 | { | 398 | { |
396 | int margin, new_alloc; | 399 | int margin, new_alloc; |
397 | 400 | ||
401 | lockdep_assert_held(&pcpu_lock); | ||
402 | |||
398 | if (is_atomic) { | 403 | if (is_atomic) { |
399 | margin = 3; | 404 | margin = 3; |
400 | 405 | ||
401 | if (chunk->map_alloc < | 406 | if (chunk->map_alloc < |
402 | chunk->map_used + PCPU_ATOMIC_MAP_MARGIN_LOW && | 407 | chunk->map_used + PCPU_ATOMIC_MAP_MARGIN_LOW) { |
403 | pcpu_async_enabled) | 408 | if (list_empty(&chunk->map_extend_list)) { |
404 | schedule_work(&chunk->map_extend_work); | 409 | list_add_tail(&chunk->map_extend_list, |
410 | &pcpu_map_extend_chunks); | ||
411 | pcpu_schedule_balance_work(); | ||
412 | } | ||
413 | } | ||
405 | } else { | 414 | } else { |
406 | margin = PCPU_ATOMIC_MAP_MARGIN_HIGH; | 415 | margin = PCPU_ATOMIC_MAP_MARGIN_HIGH; |
407 | } | 416 | } |
@@ -467,20 +476,6 @@ out_unlock: | |||
467 | return 0; | 476 | return 0; |
468 | } | 477 | } |
469 | 478 | ||
470 | static void pcpu_map_extend_workfn(struct work_struct *work) | ||
471 | { | ||
472 | struct pcpu_chunk *chunk = container_of(work, struct pcpu_chunk, | ||
473 | map_extend_work); | ||
474 | int new_alloc; | ||
475 | |||
476 | spin_lock_irq(&pcpu_lock); | ||
477 | new_alloc = pcpu_need_to_extend(chunk, false); | ||
478 | spin_unlock_irq(&pcpu_lock); | ||
479 | |||
480 | if (new_alloc) | ||
481 | pcpu_extend_area_map(chunk, new_alloc); | ||
482 | } | ||
483 | |||
484 | /** | 479 | /** |
485 | * pcpu_fit_in_area - try to fit the requested allocation in a candidate area | 480 | * pcpu_fit_in_area - try to fit the requested allocation in a candidate area |
486 | * @chunk: chunk the candidate area belongs to | 481 | * @chunk: chunk the candidate area belongs to |
@@ -740,7 +735,7 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void) | |||
740 | chunk->map_used = 1; | 735 | chunk->map_used = 1; |
741 | 736 | ||
742 | INIT_LIST_HEAD(&chunk->list); | 737 | INIT_LIST_HEAD(&chunk->list); |
743 | INIT_WORK(&chunk->map_extend_work, pcpu_map_extend_workfn); | 738 | INIT_LIST_HEAD(&chunk->map_extend_list); |
744 | chunk->free_size = pcpu_unit_size; | 739 | chunk->free_size = pcpu_unit_size; |
745 | chunk->contig_hint = pcpu_unit_size; | 740 | chunk->contig_hint = pcpu_unit_size; |
746 | 741 | ||
@@ -1129,6 +1124,7 @@ static void pcpu_balance_workfn(struct work_struct *work) | |||
1129 | if (chunk == list_first_entry(free_head, struct pcpu_chunk, list)) | 1124 | if (chunk == list_first_entry(free_head, struct pcpu_chunk, list)) |
1130 | continue; | 1125 | continue; |
1131 | 1126 | ||
1127 | list_del_init(&chunk->map_extend_list); | ||
1132 | list_move(&chunk->list, &to_free); | 1128 | list_move(&chunk->list, &to_free); |
1133 | } | 1129 | } |
1134 | 1130 | ||
@@ -1146,6 +1142,25 @@ static void pcpu_balance_workfn(struct work_struct *work) | |||
1146 | pcpu_destroy_chunk(chunk); | 1142 | pcpu_destroy_chunk(chunk); |
1147 | } | 1143 | } |
1148 | 1144 | ||
1145 | /* service chunks which requested async area map extension */ | ||
1146 | do { | ||
1147 | int new_alloc = 0; | ||
1148 | |||
1149 | spin_lock_irq(&pcpu_lock); | ||
1150 | |||
1151 | chunk = list_first_entry_or_null(&pcpu_map_extend_chunks, | ||
1152 | struct pcpu_chunk, map_extend_list); | ||
1153 | if (chunk) { | ||
1154 | list_del_init(&chunk->map_extend_list); | ||
1155 | new_alloc = pcpu_need_to_extend(chunk, false); | ||
1156 | } | ||
1157 | |||
1158 | spin_unlock_irq(&pcpu_lock); | ||
1159 | |||
1160 | if (new_alloc) | ||
1161 | pcpu_extend_area_map(chunk, new_alloc); | ||
1162 | } while (chunk); | ||
1163 | |||
1149 | /* | 1164 | /* |
1150 | * Ensure there are certain number of free populated pages for | 1165 | * Ensure there are certain number of free populated pages for |
1151 | * atomic allocs. Fill up from the most packed so that atomic | 1166 | * atomic allocs. Fill up from the most packed so that atomic |
@@ -1644,7 +1659,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, | |||
1644 | */ | 1659 | */ |
1645 | schunk = memblock_virt_alloc(pcpu_chunk_struct_size, 0); | 1660 | schunk = memblock_virt_alloc(pcpu_chunk_struct_size, 0); |
1646 | INIT_LIST_HEAD(&schunk->list); | 1661 | INIT_LIST_HEAD(&schunk->list); |
1647 | INIT_WORK(&schunk->map_extend_work, pcpu_map_extend_workfn); | 1662 | INIT_LIST_HEAD(&schunk->map_extend_list); |
1648 | schunk->base_addr = base_addr; | 1663 | schunk->base_addr = base_addr; |
1649 | schunk->map = smap; | 1664 | schunk->map = smap; |
1650 | schunk->map_alloc = ARRAY_SIZE(smap); | 1665 | schunk->map_alloc = ARRAY_SIZE(smap); |
@@ -1673,7 +1688,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, | |||
1673 | if (dyn_size) { | 1688 | if (dyn_size) { |
1674 | dchunk = memblock_virt_alloc(pcpu_chunk_struct_size, 0); | 1689 | dchunk = memblock_virt_alloc(pcpu_chunk_struct_size, 0); |
1675 | INIT_LIST_HEAD(&dchunk->list); | 1690 | INIT_LIST_HEAD(&dchunk->list); |
1676 | INIT_WORK(&dchunk->map_extend_work, pcpu_map_extend_workfn); | 1691 | INIT_LIST_HEAD(&dchunk->map_extend_list); |
1677 | dchunk->base_addr = base_addr; | 1692 | dchunk->base_addr = base_addr; |
1678 | dchunk->map = dmap; | 1693 | dchunk->map = dmap; |
1679 | dchunk->map_alloc = ARRAY_SIZE(dmap); | 1694 | dchunk->map_alloc = ARRAY_SIZE(dmap); |