diff options
Diffstat (limited to 'mm/percpu.c')
-rw-r--r-- | mm/percpu.c | 162 |
1 files changed, 114 insertions, 48 deletions
diff --git a/mm/percpu.c b/mm/percpu.c index e2e80fc78601..442010cc91c6 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
@@ -72,6 +72,7 @@ | |||
72 | #include <asm/cacheflush.h> | 72 | #include <asm/cacheflush.h> |
73 | #include <asm/sections.h> | 73 | #include <asm/sections.h> |
74 | #include <asm/tlbflush.h> | 74 | #include <asm/tlbflush.h> |
75 | #include <asm/io.h> | ||
75 | 76 | ||
76 | #define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */ | 77 | #define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */ |
77 | #define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */ | 78 | #define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */ |
@@ -151,7 +152,10 @@ static int pcpu_reserved_chunk_limit; | |||
151 | * | 152 | * |
152 | * During allocation, pcpu_alloc_mutex is kept locked all the time and | 153 | * During allocation, pcpu_alloc_mutex is kept locked all the time and |
153 | * pcpu_lock is grabbed and released as necessary. All actual memory | 154 | * pcpu_lock is grabbed and released as necessary. All actual memory |
154 | * allocations are done using GFP_KERNEL with pcpu_lock released. | 155 | * allocations are done using GFP_KERNEL with pcpu_lock released. In |
156 | * general, percpu memory can't be allocated with irq off but | ||
157 | * irqsave/restore are still used in alloc path so that it can be used | ||
158 | * from early init path - sched_init() specifically. | ||
155 | * | 159 | * |
156 | * Free path accesses and alters only the index data structures, so it | 160 | * Free path accesses and alters only the index data structures, so it |
157 | * can be safely called from atomic context. When memory needs to be | 161 | * can be safely called from atomic context. When memory needs to be |
@@ -350,63 +354,86 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) | |||
350 | } | 354 | } |
351 | 355 | ||
352 | /** | 356 | /** |
353 | * pcpu_extend_area_map - extend area map for allocation | 357 | * pcpu_need_to_extend - determine whether chunk area map needs to be extended |
354 | * @chunk: target chunk | 358 | * @chunk: chunk of interest |
355 | * | 359 | * |
356 | * Extend area map of @chunk so that it can accomodate an allocation. | 360 | * Determine whether area map of @chunk needs to be extended to |
357 | * A single allocation can split an area into three areas, so this | 361 | * accomodate a new allocation. |
358 | * function makes sure that @chunk->map has at least two extra slots. | ||
359 | * | 362 | * |
360 | * CONTEXT: | 363 | * CONTEXT: |
361 | * pcpu_alloc_mutex, pcpu_lock. pcpu_lock is released and reacquired | 364 | * pcpu_lock. |
362 | * if area map is extended. | ||
363 | * | 365 | * |
364 | * RETURNS: | 366 | * RETURNS: |
365 | * 0 if noop, 1 if successfully extended, -errno on failure. | 367 | * New target map allocation length if extension is necessary, 0 |
368 | * otherwise. | ||
366 | */ | 369 | */ |
367 | static int pcpu_extend_area_map(struct pcpu_chunk *chunk) | 370 | static int pcpu_need_to_extend(struct pcpu_chunk *chunk) |
368 | __releases(lock) __acquires(lock) | ||
369 | { | 371 | { |
370 | int new_alloc; | 372 | int new_alloc; |
371 | int *new; | ||
372 | size_t size; | ||
373 | 373 | ||
374 | /* has enough? */ | ||
375 | if (chunk->map_alloc >= chunk->map_used + 2) | 374 | if (chunk->map_alloc >= chunk->map_used + 2) |
376 | return 0; | 375 | return 0; |
377 | 376 | ||
378 | spin_unlock_irq(&pcpu_lock); | ||
379 | |||
380 | new_alloc = PCPU_DFL_MAP_ALLOC; | 377 | new_alloc = PCPU_DFL_MAP_ALLOC; |
381 | while (new_alloc < chunk->map_used + 2) | 378 | while (new_alloc < chunk->map_used + 2) |
382 | new_alloc *= 2; | 379 | new_alloc *= 2; |
383 | 380 | ||
384 | new = pcpu_mem_alloc(new_alloc * sizeof(new[0])); | 381 | return new_alloc; |
385 | if (!new) { | 382 | } |
386 | spin_lock_irq(&pcpu_lock); | 383 | |
384 | /** | ||
385 | * pcpu_extend_area_map - extend area map of a chunk | ||
386 | * @chunk: chunk of interest | ||
387 | * @new_alloc: new target allocation length of the area map | ||
388 | * | ||
389 | * Extend area map of @chunk to have @new_alloc entries. | ||
390 | * | ||
391 | * CONTEXT: | ||
392 | * Does GFP_KERNEL allocation. Grabs and releases pcpu_lock. | ||
393 | * | ||
394 | * RETURNS: | ||
395 | * 0 on success, -errno on failure. | ||
396 | */ | ||
397 | static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc) | ||
398 | { | ||
399 | int *old = NULL, *new = NULL; | ||
400 | size_t old_size = 0, new_size = new_alloc * sizeof(new[0]); | ||
401 | unsigned long flags; | ||
402 | |||
403 | new = pcpu_mem_alloc(new_size); | ||
404 | if (!new) | ||
387 | return -ENOMEM; | 405 | return -ENOMEM; |
388 | } | ||
389 | 406 | ||
390 | /* | 407 | /* acquire pcpu_lock and switch to new area map */ |
391 | * Acquire pcpu_lock and switch to new area map. Only free | 408 | spin_lock_irqsave(&pcpu_lock, flags); |
392 | * could have happened inbetween, so map_used couldn't have | ||
393 | * grown. | ||
394 | */ | ||
395 | spin_lock_irq(&pcpu_lock); | ||
396 | BUG_ON(new_alloc < chunk->map_used + 2); | ||
397 | 409 | ||
398 | size = chunk->map_alloc * sizeof(chunk->map[0]); | 410 | if (new_alloc <= chunk->map_alloc) |
399 | memcpy(new, chunk->map, size); | 411 | goto out_unlock; |
412 | |||
413 | old_size = chunk->map_alloc * sizeof(chunk->map[0]); | ||
414 | memcpy(new, chunk->map, old_size); | ||
400 | 415 | ||
401 | /* | 416 | /* |
402 | * map_alloc < PCPU_DFL_MAP_ALLOC indicates that the chunk is | 417 | * map_alloc < PCPU_DFL_MAP_ALLOC indicates that the chunk is |
403 | * one of the first chunks and still using static map. | 418 | * one of the first chunks and still using static map. |
404 | */ | 419 | */ |
405 | if (chunk->map_alloc >= PCPU_DFL_MAP_ALLOC) | 420 | if (chunk->map_alloc >= PCPU_DFL_MAP_ALLOC) |
406 | pcpu_mem_free(chunk->map, size); | 421 | old = chunk->map; |
407 | 422 | ||
408 | chunk->map_alloc = new_alloc; | 423 | chunk->map_alloc = new_alloc; |
409 | chunk->map = new; | 424 | chunk->map = new; |
425 | new = NULL; | ||
426 | |||
427 | out_unlock: | ||
428 | spin_unlock_irqrestore(&pcpu_lock, flags); | ||
429 | |||
430 | /* | ||
431 | * pcpu_mem_free() might end up calling vfree() which uses | ||
432 | * IRQ-unsafe lock and thus can't be called under pcpu_lock. | ||
433 | */ | ||
434 | pcpu_mem_free(old, old_size); | ||
435 | pcpu_mem_free(new, new_size); | ||
436 | |||
410 | return 0; | 437 | return 0; |
411 | } | 438 | } |
412 | 439 | ||
@@ -1045,7 +1072,8 @@ static void *pcpu_alloc(size_t size, size_t align, bool reserved) | |||
1045 | static int warn_limit = 10; | 1072 | static int warn_limit = 10; |
1046 | struct pcpu_chunk *chunk; | 1073 | struct pcpu_chunk *chunk; |
1047 | const char *err; | 1074 | const char *err; |
1048 | int slot, off; | 1075 | int slot, off, new_alloc; |
1076 | unsigned long flags; | ||
1049 | 1077 | ||
1050 | if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) { | 1078 | if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) { |
1051 | WARN(true, "illegal size (%zu) or align (%zu) for " | 1079 | WARN(true, "illegal size (%zu) or align (%zu) for " |
@@ -1054,19 +1082,30 @@ static void *pcpu_alloc(size_t size, size_t align, bool reserved) | |||
1054 | } | 1082 | } |
1055 | 1083 | ||
1056 | mutex_lock(&pcpu_alloc_mutex); | 1084 | mutex_lock(&pcpu_alloc_mutex); |
1057 | spin_lock_irq(&pcpu_lock); | 1085 | spin_lock_irqsave(&pcpu_lock, flags); |
1058 | 1086 | ||
1059 | /* serve reserved allocations from the reserved chunk if available */ | 1087 | /* serve reserved allocations from the reserved chunk if available */ |
1060 | if (reserved && pcpu_reserved_chunk) { | 1088 | if (reserved && pcpu_reserved_chunk) { |
1061 | chunk = pcpu_reserved_chunk; | 1089 | chunk = pcpu_reserved_chunk; |
1062 | if (size > chunk->contig_hint || | 1090 | |
1063 | pcpu_extend_area_map(chunk) < 0) { | 1091 | if (size > chunk->contig_hint) { |
1064 | err = "failed to extend area map of reserved chunk"; | 1092 | err = "alloc from reserved chunk failed"; |
1065 | goto fail_unlock; | 1093 | goto fail_unlock; |
1066 | } | 1094 | } |
1095 | |||
1096 | while ((new_alloc = pcpu_need_to_extend(chunk))) { | ||
1097 | spin_unlock_irqrestore(&pcpu_lock, flags); | ||
1098 | if (pcpu_extend_area_map(chunk, new_alloc) < 0) { | ||
1099 | err = "failed to extend area map of reserved chunk"; | ||
1100 | goto fail_unlock_mutex; | ||
1101 | } | ||
1102 | spin_lock_irqsave(&pcpu_lock, flags); | ||
1103 | } | ||
1104 | |||
1067 | off = pcpu_alloc_area(chunk, size, align); | 1105 | off = pcpu_alloc_area(chunk, size, align); |
1068 | if (off >= 0) | 1106 | if (off >= 0) |
1069 | goto area_found; | 1107 | goto area_found; |
1108 | |||
1070 | err = "alloc from reserved chunk failed"; | 1109 | err = "alloc from reserved chunk failed"; |
1071 | goto fail_unlock; | 1110 | goto fail_unlock; |
1072 | } | 1111 | } |
@@ -1078,14 +1117,20 @@ restart: | |||
1078 | if (size > chunk->contig_hint) | 1117 | if (size > chunk->contig_hint) |
1079 | continue; | 1118 | continue; |
1080 | 1119 | ||
1081 | switch (pcpu_extend_area_map(chunk)) { | 1120 | new_alloc = pcpu_need_to_extend(chunk); |
1082 | case 0: | 1121 | if (new_alloc) { |
1083 | break; | 1122 | spin_unlock_irqrestore(&pcpu_lock, flags); |
1084 | case 1: | 1123 | if (pcpu_extend_area_map(chunk, |
1085 | goto restart; /* pcpu_lock dropped, restart */ | 1124 | new_alloc) < 0) { |
1086 | default: | 1125 | err = "failed to extend area map"; |
1087 | err = "failed to extend area map"; | 1126 | goto fail_unlock_mutex; |
1088 | goto fail_unlock; | 1127 | } |
1128 | spin_lock_irqsave(&pcpu_lock, flags); | ||
1129 | /* | ||
1130 | * pcpu_lock has been dropped, need to | ||
1131 | * restart cpu_slot list walking. | ||
1132 | */ | ||
1133 | goto restart; | ||
1089 | } | 1134 | } |
1090 | 1135 | ||
1091 | off = pcpu_alloc_area(chunk, size, align); | 1136 | off = pcpu_alloc_area(chunk, size, align); |
@@ -1095,7 +1140,7 @@ restart: | |||
1095 | } | 1140 | } |
1096 | 1141 | ||
1097 | /* hmmm... no space left, create a new chunk */ | 1142 | /* hmmm... no space left, create a new chunk */ |
1098 | spin_unlock_irq(&pcpu_lock); | 1143 | spin_unlock_irqrestore(&pcpu_lock, flags); |
1099 | 1144 | ||
1100 | chunk = alloc_pcpu_chunk(); | 1145 | chunk = alloc_pcpu_chunk(); |
1101 | if (!chunk) { | 1146 | if (!chunk) { |
@@ -1103,16 +1148,16 @@ restart: | |||
1103 | goto fail_unlock_mutex; | 1148 | goto fail_unlock_mutex; |
1104 | } | 1149 | } |
1105 | 1150 | ||
1106 | spin_lock_irq(&pcpu_lock); | 1151 | spin_lock_irqsave(&pcpu_lock, flags); |
1107 | pcpu_chunk_relocate(chunk, -1); | 1152 | pcpu_chunk_relocate(chunk, -1); |
1108 | goto restart; | 1153 | goto restart; |
1109 | 1154 | ||
1110 | area_found: | 1155 | area_found: |
1111 | spin_unlock_irq(&pcpu_lock); | 1156 | spin_unlock_irqrestore(&pcpu_lock, flags); |
1112 | 1157 | ||
1113 | /* populate, map and clear the area */ | 1158 | /* populate, map and clear the area */ |
1114 | if (pcpu_populate_chunk(chunk, off, size)) { | 1159 | if (pcpu_populate_chunk(chunk, off, size)) { |
1115 | spin_lock_irq(&pcpu_lock); | 1160 | spin_lock_irqsave(&pcpu_lock, flags); |
1116 | pcpu_free_area(chunk, off); | 1161 | pcpu_free_area(chunk, off); |
1117 | err = "failed to populate"; | 1162 | err = "failed to populate"; |
1118 | goto fail_unlock; | 1163 | goto fail_unlock; |
@@ -1124,7 +1169,7 @@ area_found: | |||
1124 | return __addr_to_pcpu_ptr(chunk->base_addr + off); | 1169 | return __addr_to_pcpu_ptr(chunk->base_addr + off); |
1125 | 1170 | ||
1126 | fail_unlock: | 1171 | fail_unlock: |
1127 | spin_unlock_irq(&pcpu_lock); | 1172 | spin_unlock_irqrestore(&pcpu_lock, flags); |
1128 | fail_unlock_mutex: | 1173 | fail_unlock_mutex: |
1129 | mutex_unlock(&pcpu_alloc_mutex); | 1174 | mutex_unlock(&pcpu_alloc_mutex); |
1130 | if (warn_limit) { | 1175 | if (warn_limit) { |
@@ -1256,6 +1301,27 @@ void free_percpu(void *ptr) | |||
1256 | } | 1301 | } |
1257 | EXPORT_SYMBOL_GPL(free_percpu); | 1302 | EXPORT_SYMBOL_GPL(free_percpu); |
1258 | 1303 | ||
1304 | /** | ||
1305 | * per_cpu_ptr_to_phys - convert translated percpu address to physical address | ||
1306 | * @addr: the address to be converted to physical address | ||
1307 | * | ||
1308 | * Given @addr which is dereferenceable address obtained via one of | ||
1309 | * percpu access macros, this function translates it into its physical | ||
1310 | * address. The caller is responsible for ensuring @addr stays valid | ||
1311 | * until this function finishes. | ||
1312 | * | ||
1313 | * RETURNS: | ||
1314 | * The physical address for @addr. | ||
1315 | */ | ||
1316 | phys_addr_t per_cpu_ptr_to_phys(void *addr) | ||
1317 | { | ||
1318 | if ((unsigned long)addr < VMALLOC_START || | ||
1319 | (unsigned long)addr >= VMALLOC_END) | ||
1320 | return __pa(addr); | ||
1321 | else | ||
1322 | return page_to_phys(vmalloc_to_page(addr)); | ||
1323 | } | ||
1324 | |||
1259 | static inline size_t pcpu_calc_fc_sizes(size_t static_size, | 1325 | static inline size_t pcpu_calc_fc_sizes(size_t static_size, |
1260 | size_t reserved_size, | 1326 | size_t reserved_size, |
1261 | ssize_t *dyn_sizep) | 1327 | ssize_t *dyn_sizep) |