diff options
author | Tejun Heo <tj@kernel.org> | 2009-11-11 01:35:18 -0500 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2009-11-12 10:55:35 -0500 |
commit | 833af8427be4b217b5bc522f61afdbd3f1d282c2 (patch) | |
tree | 7af86f8c599c4513f81c9b37ec34926cb4728316 | |
parent | 4a6cc4bd32e580722882115d4c8b964d732c11e4 (diff) |
percpu: restructure pcpu_extend_area_map() to fix bugs and improve readability
pcpu_extend_area_map() had the following two bugs.
* It should return 1 if pcpu_lock was dropped and reacquired but it
returned 0. This could lead to oops if free_percpu() races with
area map extension.
* pcpu_mem_free() was called under pcpu_lock. pcpu_mem_free() might
end up calling vfree() which isn't IRQ safe. This could lead to
deadlock through lock order inversion via IRQ.
In addition, Linus pointed out that the temporary lock dropping and
subtle three-way return value of pcpu_extend_area_map() was very ugly
and suggested to split the function into two - pcpu_need_to_extend()
and pcpu_extend_area_map().
This patch restructures pcpu_extend_area_map() as suggested and fixes
the two bugs.
Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | mm/percpu.c | 121 |
1 files changed, 81 insertions, 40 deletions
diff --git a/mm/percpu.c b/mm/percpu.c index d90797160c2a..5adfc268b408 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
@@ -355,62 +355,86 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) | |||
355 | } | 355 | } |
356 | 356 | ||
357 | /** | 357 | /** |
358 | * pcpu_extend_area_map - extend area map for allocation | 358 | * pcpu_need_to_extend - determine whether chunk area map needs to be extended |
359 | * @chunk: target chunk | 359 | * @chunk: chunk of interest |
360 | * | 360 | * |
361 | * Extend area map of @chunk so that it can accomodate an allocation. | 361 | * Determine whether area map of @chunk needs to be extended to |
362 | * A single allocation can split an area into three areas, so this | 362 | * accomodate a new allocation. |
363 | * function makes sure that @chunk->map has at least two extra slots. | ||
364 | * | 363 | * |
365 | * CONTEXT: | 364 | * CONTEXT: |
366 | * pcpu_alloc_mutex, pcpu_lock. pcpu_lock is released and reacquired | 365 | * pcpu_lock. |
367 | * if area map is extended. | ||
368 | * | 366 | * |
369 | * RETURNS: | 367 | * RETURNS: |
370 | * 0 if noop, 1 if successfully extended, -errno on failure. | 368 | * New target map allocation length if extension is necessary, 0 |
369 | * otherwise. | ||
371 | */ | 370 | */ |
372 | static int pcpu_extend_area_map(struct pcpu_chunk *chunk, unsigned long *flags) | 371 | static int pcpu_need_to_extend(struct pcpu_chunk *chunk) |
373 | { | 372 | { |
374 | int new_alloc; | 373 | int new_alloc; |
375 | int *new; | ||
376 | size_t size; | ||
377 | 374 | ||
378 | /* has enough? */ | ||
379 | if (chunk->map_alloc >= chunk->map_used + 2) | 375 | if (chunk->map_alloc >= chunk->map_used + 2) |
380 | return 0; | 376 | return 0; |
381 | 377 | ||
382 | spin_unlock_irqrestore(&pcpu_lock, *flags); | ||
383 | |||
384 | new_alloc = PCPU_DFL_MAP_ALLOC; | 378 | new_alloc = PCPU_DFL_MAP_ALLOC; |
385 | while (new_alloc < chunk->map_used + 2) | 379 | while (new_alloc < chunk->map_used + 2) |
386 | new_alloc *= 2; | 380 | new_alloc *= 2; |
387 | 381 | ||
388 | new = pcpu_mem_alloc(new_alloc * sizeof(new[0])); | 382 | return new_alloc; |
389 | if (!new) { | 383 | } |
390 | spin_lock_irqsave(&pcpu_lock, *flags); | 384 | |
385 | /** | ||
386 | * pcpu_extend_area_map - extend area map of a chunk | ||
387 | * @chunk: chunk of interest | ||
388 | * @new_alloc: new target allocation length of the area map | ||
389 | * | ||
390 | * Extend area map of @chunk to have @new_alloc entries. | ||
391 | * | ||
392 | * CONTEXT: | ||
393 | * Does GFP_KERNEL allocation. Grabs and releases pcpu_lock. | ||
394 | * | ||
395 | * RETURNS: | ||
396 | * 0 on success, -errno on failure. | ||
397 | */ | ||
398 | static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc) | ||
399 | { | ||
400 | int *old = NULL, *new = NULL; | ||
401 | size_t old_size = 0, new_size = new_alloc * sizeof(new[0]); | ||
402 | unsigned long flags; | ||
403 | |||
404 | new = pcpu_mem_alloc(new_size); | ||
405 | if (!new) | ||
391 | return -ENOMEM; | 406 | return -ENOMEM; |
392 | } | ||
393 | 407 | ||
394 | /* | 408 | /* acquire pcpu_lock and switch to new area map */ |
395 | * Acquire pcpu_lock and switch to new area map. Only free | 409 | spin_lock_irqsave(&pcpu_lock, flags); |
396 | * could have happened inbetween, so map_used couldn't have | 410 | |
397 | * grown. | 411 | if (new_alloc <= chunk->map_alloc) |
398 | */ | 412 | goto out_unlock; |
399 | spin_lock_irqsave(&pcpu_lock, *flags); | ||
400 | BUG_ON(new_alloc < chunk->map_used + 2); | ||
401 | 413 | ||
402 | size = chunk->map_alloc * sizeof(chunk->map[0]); | 414 | old_size = chunk->map_alloc * sizeof(chunk->map[0]); |
403 | memcpy(new, chunk->map, size); | 415 | memcpy(new, chunk->map, old_size); |
404 | 416 | ||
405 | /* | 417 | /* |
406 | * map_alloc < PCPU_DFL_MAP_ALLOC indicates that the chunk is | 418 | * map_alloc < PCPU_DFL_MAP_ALLOC indicates that the chunk is |
407 | * one of the first chunks and still using static map. | 419 | * one of the first chunks and still using static map. |
408 | */ | 420 | */ |
409 | if (chunk->map_alloc >= PCPU_DFL_MAP_ALLOC) | 421 | if (chunk->map_alloc >= PCPU_DFL_MAP_ALLOC) |
410 | pcpu_mem_free(chunk->map, size); | 422 | old = chunk->map; |
411 | 423 | ||
412 | chunk->map_alloc = new_alloc; | 424 | chunk->map_alloc = new_alloc; |
413 | chunk->map = new; | 425 | chunk->map = new; |
426 | new = NULL; | ||
427 | |||
428 | out_unlock: | ||
429 | spin_unlock_irqrestore(&pcpu_lock, flags); | ||
430 | |||
431 | /* | ||
432 | * pcpu_mem_free() might end up calling vfree() which uses | ||
433 | * IRQ-unsafe lock and thus can't be called under pcpu_lock. | ||
434 | */ | ||
435 | pcpu_mem_free(old, old_size); | ||
436 | pcpu_mem_free(new, new_size); | ||
437 | |||
414 | return 0; | 438 | return 0; |
415 | } | 439 | } |
416 | 440 | ||
@@ -1049,7 +1073,7 @@ static void *pcpu_alloc(size_t size, size_t align, bool reserved) | |||
1049 | static int warn_limit = 10; | 1073 | static int warn_limit = 10; |
1050 | struct pcpu_chunk *chunk; | 1074 | struct pcpu_chunk *chunk; |
1051 | const char *err; | 1075 | const char *err; |
1052 | int slot, off; | 1076 | int slot, off, new_alloc; |
1053 | unsigned long flags; | 1077 | unsigned long flags; |
1054 | 1078 | ||
1055 | if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) { | 1079 | if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) { |
@@ -1064,14 +1088,25 @@ static void *pcpu_alloc(size_t size, size_t align, bool reserved) | |||
1064 | /* serve reserved allocations from the reserved chunk if available */ | 1088 | /* serve reserved allocations from the reserved chunk if available */ |
1065 | if (reserved && pcpu_reserved_chunk) { | 1089 | if (reserved && pcpu_reserved_chunk) { |
1066 | chunk = pcpu_reserved_chunk; | 1090 | chunk = pcpu_reserved_chunk; |
1067 | if (size > chunk->contig_hint || | 1091 | |
1068 | pcpu_extend_area_map(chunk, &flags) < 0) { | 1092 | if (size > chunk->contig_hint) { |
1069 | err = "failed to extend area map of reserved chunk"; | 1093 | err = "alloc from reserved chunk failed"; |
1070 | goto fail_unlock; | 1094 | goto fail_unlock; |
1071 | } | 1095 | } |
1096 | |||
1097 | while ((new_alloc = pcpu_need_to_extend(chunk))) { | ||
1098 | spin_unlock_irqrestore(&pcpu_lock, flags); | ||
1099 | if (pcpu_extend_area_map(chunk, new_alloc) < 0) { | ||
1100 | err = "failed to extend area map of reserved chunk"; | ||
1101 | goto fail_unlock_mutex; | ||
1102 | } | ||
1103 | spin_lock_irqsave(&pcpu_lock, flags); | ||
1104 | } | ||
1105 | |||
1072 | off = pcpu_alloc_area(chunk, size, align); | 1106 | off = pcpu_alloc_area(chunk, size, align); |
1073 | if (off >= 0) | 1107 | if (off >= 0) |
1074 | goto area_found; | 1108 | goto area_found; |
1109 | |||
1075 | err = "alloc from reserved chunk failed"; | 1110 | err = "alloc from reserved chunk failed"; |
1076 | goto fail_unlock; | 1111 | goto fail_unlock; |
1077 | } | 1112 | } |
@@ -1083,14 +1118,20 @@ restart: | |||
1083 | if (size > chunk->contig_hint) | 1118 | if (size > chunk->contig_hint) |
1084 | continue; | 1119 | continue; |
1085 | 1120 | ||
1086 | switch (pcpu_extend_area_map(chunk, &flags)) { | 1121 | new_alloc = pcpu_need_to_extend(chunk); |
1087 | case 0: | 1122 | if (new_alloc) { |
1088 | break; | 1123 | spin_unlock_irqrestore(&pcpu_lock, flags); |
1089 | case 1: | 1124 | if (pcpu_extend_area_map(chunk, |
1090 | goto restart; /* pcpu_lock dropped, restart */ | 1125 | new_alloc) < 0) { |
1091 | default: | 1126 | err = "failed to extend area map"; |
1092 | err = "failed to extend area map"; | 1127 | goto fail_unlock_mutex; |
1093 | goto fail_unlock; | 1128 | } |
1129 | spin_lock_irqsave(&pcpu_lock, flags); | ||
1130 | /* | ||
1131 | * pcpu_lock has been dropped, need to | ||
1132 | * restart cpu_slot list walking. | ||
1133 | */ | ||
1134 | goto restart; | ||
1094 | } | 1135 | } |
1095 | 1136 | ||
1096 | off = pcpu_alloc_area(chunk, size, align); | 1137 | off = pcpu_alloc_area(chunk, size, align); |