aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2009-11-11 01:35:18 -0500
committerTejun Heo <tj@kernel.org>2009-11-12 10:55:35 -0500
commit833af8427be4b217b5bc522f61afdbd3f1d282c2 (patch)
tree7af86f8c599c4513f81c9b37ec34926cb4728316
parent4a6cc4bd32e580722882115d4c8b964d732c11e4 (diff)
percpu: restructure pcpu_extend_area_map() to fix bugs and improve readability
pcpu_extend_area_map() had the following two bugs. * It should return 1 if pcpu_lock was dropped and reacquired but it returned 0. This could lead to oops if free_percpu() races with area map extension. * pcpu_mem_free() was called under pcpu_lock. pcpu_mem_free() might end up calling vfree() which isn't IRQ safe. This could lead to deadlock through lock order inversion via IRQ. In addition, Linus pointed out that the temporary lock dropping and subtle three-way return value of pcpu_extend_area_map() was very ugly and suggested to split the function into two - pcpu_need_to_extend() and pcpu_extend_area_map(). This patch restructures pcpu_extend_area_map() as suggested and fixes the two bugs. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Linus Torvalds <torvalds@linux-foundation.org> Cc: Ingo Molnar <mingo@elte.hu>
-rw-r--r--mm/percpu.c121
1 files changed, 81 insertions, 40 deletions
diff --git a/mm/percpu.c b/mm/percpu.c
index d90797160c2a..5adfc268b408 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -355,62 +355,86 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
355} 355}
356 356
357/** 357/**
358 * pcpu_extend_area_map - extend area map for allocation 358 * pcpu_need_to_extend - determine whether chunk area map needs to be extended
359 * @chunk: target chunk 359 * @chunk: chunk of interest
360 * 360 *
361 * Extend area map of @chunk so that it can accomodate an allocation. 361 * Determine whether area map of @chunk needs to be extended to
362 * A single allocation can split an area into three areas, so this 362 * accomodate a new allocation.
363 * function makes sure that @chunk->map has at least two extra slots.
364 * 363 *
365 * CONTEXT: 364 * CONTEXT:
366 * pcpu_alloc_mutex, pcpu_lock. pcpu_lock is released and reacquired 365 * pcpu_lock.
367 * if area map is extended.
368 * 366 *
369 * RETURNS: 367 * RETURNS:
370 * 0 if noop, 1 if successfully extended, -errno on failure. 368 * New target map allocation length if extension is necessary, 0
369 * otherwise.
371 */ 370 */
372static int pcpu_extend_area_map(struct pcpu_chunk *chunk, unsigned long *flags) 371static int pcpu_need_to_extend(struct pcpu_chunk *chunk)
373{ 372{
374 int new_alloc; 373 int new_alloc;
375 int *new;
376 size_t size;
377 374
378 /* has enough? */
379 if (chunk->map_alloc >= chunk->map_used + 2) 375 if (chunk->map_alloc >= chunk->map_used + 2)
380 return 0; 376 return 0;
381 377
382 spin_unlock_irqrestore(&pcpu_lock, *flags);
383
384 new_alloc = PCPU_DFL_MAP_ALLOC; 378 new_alloc = PCPU_DFL_MAP_ALLOC;
385 while (new_alloc < chunk->map_used + 2) 379 while (new_alloc < chunk->map_used + 2)
386 new_alloc *= 2; 380 new_alloc *= 2;
387 381
388 new = pcpu_mem_alloc(new_alloc * sizeof(new[0])); 382 return new_alloc;
389 if (!new) { 383}
390 spin_lock_irqsave(&pcpu_lock, *flags); 384
385/**
386 * pcpu_extend_area_map - extend area map of a chunk
387 * @chunk: chunk of interest
388 * @new_alloc: new target allocation length of the area map
389 *
390 * Extend area map of @chunk to have @new_alloc entries.
391 *
392 * CONTEXT:
393 * Does GFP_KERNEL allocation. Grabs and releases pcpu_lock.
394 *
395 * RETURNS:
396 * 0 on success, -errno on failure.
397 */
398static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc)
399{
400 int *old = NULL, *new = NULL;
401 size_t old_size = 0, new_size = new_alloc * sizeof(new[0]);
402 unsigned long flags;
403
404 new = pcpu_mem_alloc(new_size);
405 if (!new)
391 return -ENOMEM; 406 return -ENOMEM;
392 }
393 407
394 /* 408 /* acquire pcpu_lock and switch to new area map */
395 * Acquire pcpu_lock and switch to new area map. Only free 409 spin_lock_irqsave(&pcpu_lock, flags);
396 * could have happened inbetween, so map_used couldn't have 410
397 * grown. 411 if (new_alloc <= chunk->map_alloc)
398 */ 412 goto out_unlock;
399 spin_lock_irqsave(&pcpu_lock, *flags);
400 BUG_ON(new_alloc < chunk->map_used + 2);
401 413
402 size = chunk->map_alloc * sizeof(chunk->map[0]); 414 old_size = chunk->map_alloc * sizeof(chunk->map[0]);
403 memcpy(new, chunk->map, size); 415 memcpy(new, chunk->map, old_size);
404 416
405 /* 417 /*
406 * map_alloc < PCPU_DFL_MAP_ALLOC indicates that the chunk is 418 * map_alloc < PCPU_DFL_MAP_ALLOC indicates that the chunk is
407 * one of the first chunks and still using static map. 419 * one of the first chunks and still using static map.
408 */ 420 */
409 if (chunk->map_alloc >= PCPU_DFL_MAP_ALLOC) 421 if (chunk->map_alloc >= PCPU_DFL_MAP_ALLOC)
410 pcpu_mem_free(chunk->map, size); 422 old = chunk->map;
411 423
412 chunk->map_alloc = new_alloc; 424 chunk->map_alloc = new_alloc;
413 chunk->map = new; 425 chunk->map = new;
426 new = NULL;
427
428out_unlock:
429 spin_unlock_irqrestore(&pcpu_lock, flags);
430
431 /*
432 * pcpu_mem_free() might end up calling vfree() which uses
433 * IRQ-unsafe lock and thus can't be called under pcpu_lock.
434 */
435 pcpu_mem_free(old, old_size);
436 pcpu_mem_free(new, new_size);
437
414 return 0; 438 return 0;
415} 439}
416 440
@@ -1049,7 +1073,7 @@ static void *pcpu_alloc(size_t size, size_t align, bool reserved)
1049 static int warn_limit = 10; 1073 static int warn_limit = 10;
1050 struct pcpu_chunk *chunk; 1074 struct pcpu_chunk *chunk;
1051 const char *err; 1075 const char *err;
1052 int slot, off; 1076 int slot, off, new_alloc;
1053 unsigned long flags; 1077 unsigned long flags;
1054 1078
1055 if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) { 1079 if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) {
@@ -1064,14 +1088,25 @@ static void *pcpu_alloc(size_t size, size_t align, bool reserved)
1064 /* serve reserved allocations from the reserved chunk if available */ 1088 /* serve reserved allocations from the reserved chunk if available */
1065 if (reserved && pcpu_reserved_chunk) { 1089 if (reserved && pcpu_reserved_chunk) {
1066 chunk = pcpu_reserved_chunk; 1090 chunk = pcpu_reserved_chunk;
1067 if (size > chunk->contig_hint || 1091
1068 pcpu_extend_area_map(chunk, &flags) < 0) { 1092 if (size > chunk->contig_hint) {
1069 err = "failed to extend area map of reserved chunk"; 1093 err = "alloc from reserved chunk failed";
1070 goto fail_unlock; 1094 goto fail_unlock;
1071 } 1095 }
1096
1097 while ((new_alloc = pcpu_need_to_extend(chunk))) {
1098 spin_unlock_irqrestore(&pcpu_lock, flags);
1099 if (pcpu_extend_area_map(chunk, new_alloc) < 0) {
1100 err = "failed to extend area map of reserved chunk";
1101 goto fail_unlock_mutex;
1102 }
1103 spin_lock_irqsave(&pcpu_lock, flags);
1104 }
1105
1072 off = pcpu_alloc_area(chunk, size, align); 1106 off = pcpu_alloc_area(chunk, size, align);
1073 if (off >= 0) 1107 if (off >= 0)
1074 goto area_found; 1108 goto area_found;
1109
1075 err = "alloc from reserved chunk failed"; 1110 err = "alloc from reserved chunk failed";
1076 goto fail_unlock; 1111 goto fail_unlock;
1077 } 1112 }
@@ -1083,14 +1118,20 @@ restart:
1083 if (size > chunk->contig_hint) 1118 if (size > chunk->contig_hint)
1084 continue; 1119 continue;
1085 1120
1086 switch (pcpu_extend_area_map(chunk, &flags)) { 1121 new_alloc = pcpu_need_to_extend(chunk);
1087 case 0: 1122 if (new_alloc) {
1088 break; 1123 spin_unlock_irqrestore(&pcpu_lock, flags);
1089 case 1: 1124 if (pcpu_extend_area_map(chunk,
1090 goto restart; /* pcpu_lock dropped, restart */ 1125 new_alloc) < 0) {
1091 default: 1126 err = "failed to extend area map";
1092 err = "failed to extend area map"; 1127 goto fail_unlock_mutex;
1093 goto fail_unlock; 1128 }
1129 spin_lock_irqsave(&pcpu_lock, flags);
1130 /*
1131 * pcpu_lock has been dropped, need to
1132 * restart cpu_slot list walking.
1133 */
1134 goto restart;
1094 } 1135 }
1095 1136
1096 off = pcpu_alloc_area(chunk, size, align); 1137 off = pcpu_alloc_area(chunk, size, align);