diff options
Diffstat (limited to 'mm/percpu.c')
-rw-r--r-- | mm/percpu.c | 162 |
1 files changed, 114 insertions, 48 deletions
diff --git a/mm/percpu.c b/mm/percpu.c index 77c6f7994a46..626e43c99498 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
@@ -72,6 +72,7 @@ | |||
72 | #include <asm/cacheflush.h> | 72 | #include <asm/cacheflush.h> |
73 | #include <asm/sections.h> | 73 | #include <asm/sections.h> |
74 | #include <asm/tlbflush.h> | 74 | #include <asm/tlbflush.h> |
75 | #include <asm/io.h> | ||
75 | 76 | ||
76 | #define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */ | 77 | #define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */ |
77 | #define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */ | 78 | #define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */ |
@@ -151,7 +152,10 @@ static int pcpu_reserved_chunk_limit; | |||
151 | * | 152 | * |
152 | * During allocation, pcpu_alloc_mutex is kept locked all the time and | 153 | * During allocation, pcpu_alloc_mutex is kept locked all the time and |
153 | * pcpu_lock is grabbed and released as necessary. All actual memory | 154 | * pcpu_lock is grabbed and released as necessary. All actual memory |
154 | * allocations are done using GFP_KERNEL with pcpu_lock released. | 155 | * allocations are done using GFP_KERNEL with pcpu_lock released. In |
156 | * general, percpu memory can't be allocated with irq off but | ||
157 | * irqsave/restore are still used in alloc path so that it can be used | ||
158 | * from early init path - sched_init() specifically. | ||
155 | * | 159 | * |
156 | * Free path accesses and alters only the index data structures, so it | 160 | * Free path accesses and alters only the index data structures, so it |
157 | * can be safely called from atomic context. When memory needs to be | 161 | * can be safely called from atomic context. When memory needs to be |
@@ -350,63 +354,86 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) | |||
350 | } | 354 | } |
351 | 355 | ||
352 | /** | 356 | /** |
353 | * pcpu_extend_area_map - extend area map for allocation | 357 | * pcpu_need_to_extend - determine whether chunk area map needs to be extended |
354 | * @chunk: target chunk | 358 | * @chunk: chunk of interest |
355 | * | 359 | * |
356 | * Extend area map of @chunk so that it can accomodate an allocation. | 360 | * Determine whether area map of @chunk needs to be extended to |
357 | * A single allocation can split an area into three areas, so this | 361 | * accomodate a new allocation. |
358 | * function makes sure that @chunk->map has at least two extra slots. | ||
359 | * | 362 | * |
360 | * CONTEXT: | 363 | * CONTEXT: |
361 | * pcpu_alloc_mutex, pcpu_lock. pcpu_lock is released and reacquired | 364 | * pcpu_lock. |
362 | * if area map is extended. | ||
363 | * | 365 | * |
364 | * RETURNS: | 366 | * RETURNS: |
365 | * 0 if noop, 1 if successfully extended, -errno on failure. | 367 | * New target map allocation length if extension is necessary, 0 |
368 | * otherwise. | ||
366 | */ | 369 | */ |
367 | static int pcpu_extend_area_map(struct pcpu_chunk *chunk) | 370 | static int pcpu_need_to_extend(struct pcpu_chunk *chunk) |
368 | __releases(lock) __acquires(lock) | ||
369 | { | 371 | { |
370 | int new_alloc; | 372 | int new_alloc; |
371 | int *new; | ||
372 | size_t size; | ||
373 | 373 | ||
374 | /* has enough? */ | ||
375 | if (chunk->map_alloc >= chunk->map_used + 2) | 374 | if (chunk->map_alloc >= chunk->map_used + 2) |
376 | return 0; | 375 | return 0; |
377 | 376 | ||
378 | spin_unlock_irq(&pcpu_lock); | ||
379 | |||
380 | new_alloc = PCPU_DFL_MAP_ALLOC; | 377 | new_alloc = PCPU_DFL_MAP_ALLOC; |
381 | while (new_alloc < chunk->map_used + 2) | 378 | while (new_alloc < chunk->map_used + 2) |
382 | new_alloc *= 2; | 379 | new_alloc *= 2; |
383 | 380 | ||
384 | new = pcpu_mem_alloc(new_alloc * sizeof(new[0])); | 381 | return new_alloc; |
385 | if (!new) { | 382 | } |
386 | spin_lock_irq(&pcpu_lock); | 383 | |
384 | /** | ||
385 | * pcpu_extend_area_map - extend area map of a chunk | ||
386 | * @chunk: chunk of interest | ||
387 | * @new_alloc: new target allocation length of the area map | ||
388 | * | ||
389 | * Extend area map of @chunk to have @new_alloc entries. | ||
390 | * | ||
391 | * CONTEXT: | ||
392 | * Does GFP_KERNEL allocation. Grabs and releases pcpu_lock. | ||
393 | * | ||
394 | * RETURNS: | ||
395 | * 0 on success, -errno on failure. | ||
396 | */ | ||
397 | static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc) | ||
398 | { | ||
399 | int *old = NULL, *new = NULL; | ||
400 | size_t old_size = 0, new_size = new_alloc * sizeof(new[0]); | ||
401 | unsigned long flags; | ||
402 | |||
403 | new = pcpu_mem_alloc(new_size); | ||
404 | if (!new) | ||
387 | return -ENOMEM; | 405 | return -ENOMEM; |
388 | } | ||
389 | 406 | ||
390 | /* | 407 | /* acquire pcpu_lock and switch to new area map */ |
391 | * Acquire pcpu_lock and switch to new area map. Only free | 408 | spin_lock_irqsave(&pcpu_lock, flags); |
392 | * could have happened inbetween, so map_used couldn't have | ||
393 | * grown. | ||
394 | */ | ||
395 | spin_lock_irq(&pcpu_lock); | ||
396 | BUG_ON(new_alloc < chunk->map_used + 2); | ||
397 | 409 | ||
398 | size = chunk->map_alloc * sizeof(chunk->map[0]); | 410 | if (new_alloc <= chunk->map_alloc) |
399 | memcpy(new, chunk->map, size); | 411 | goto out_unlock; |
412 | |||
413 | old_size = chunk->map_alloc * sizeof(chunk->map[0]); | ||
414 | memcpy(new, chunk->map, old_size); | ||
400 | 415 | ||
401 | /* | 416 | /* |
402 | * map_alloc < PCPU_DFL_MAP_ALLOC indicates that the chunk is | 417 | * map_alloc < PCPU_DFL_MAP_ALLOC indicates that the chunk is |
403 | * one of the first chunks and still using static map. | 418 | * one of the first chunks and still using static map. |
404 | */ | 419 | */ |
405 | if (chunk->map_alloc >= PCPU_DFL_MAP_ALLOC) | 420 | if (chunk->map_alloc >= PCPU_DFL_MAP_ALLOC) |
406 | pcpu_mem_free(chunk->map, size); | 421 | old = chunk->map; |
407 | 422 | ||
408 | chunk->map_alloc = new_alloc; | 423 | chunk->map_alloc = new_alloc; |
409 | chunk->map = new; | 424 | chunk->map = new; |
425 | new = NULL; | ||
426 | |||
427 | out_unlock: | ||
428 | spin_unlock_irqrestore(&pcpu_lock, flags); | ||
429 | |||
430 | /* | ||
431 | * pcpu_mem_free() might end up calling vfree() which uses | ||
432 | * IRQ-unsafe lock and thus can't be called under pcpu_lock. | ||
433 | */ | ||
434 | pcpu_mem_free(old, old_size); | ||
435 | pcpu_mem_free(new, new_size); | ||
436 | |||
410 | return 0; | 437 | return 0; |
411 | } | 438 | } |
412 | 439 | ||
@@ -1043,7 +1070,8 @@ static void *pcpu_alloc(size_t size, size_t align, bool reserved) | |||
1043 | static int warn_limit = 10; | 1070 | static int warn_limit = 10; |
1044 | struct pcpu_chunk *chunk; | 1071 | struct pcpu_chunk *chunk; |
1045 | const char *err; | 1072 | const char *err; |
1046 | int slot, off; | 1073 | int slot, off, new_alloc; |
1074 | unsigned long flags; | ||
1047 | 1075 | ||
1048 | if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) { | 1076 | if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) { |
1049 | WARN(true, "illegal size (%zu) or align (%zu) for " | 1077 | WARN(true, "illegal size (%zu) or align (%zu) for " |
@@ -1052,19 +1080,30 @@ static void *pcpu_alloc(size_t size, size_t align, bool reserved) | |||
1052 | } | 1080 | } |
1053 | 1081 | ||
1054 | mutex_lock(&pcpu_alloc_mutex); | 1082 | mutex_lock(&pcpu_alloc_mutex); |
1055 | spin_lock_irq(&pcpu_lock); | 1083 | spin_lock_irqsave(&pcpu_lock, flags); |
1056 | 1084 | ||
1057 | /* serve reserved allocations from the reserved chunk if available */ | 1085 | /* serve reserved allocations from the reserved chunk if available */ |
1058 | if (reserved && pcpu_reserved_chunk) { | 1086 | if (reserved && pcpu_reserved_chunk) { |
1059 | chunk = pcpu_reserved_chunk; | 1087 | chunk = pcpu_reserved_chunk; |
1060 | if (size > chunk->contig_hint || | 1088 | |
1061 | pcpu_extend_area_map(chunk) < 0) { | 1089 | if (size > chunk->contig_hint) { |
1062 | err = "failed to extend area map of reserved chunk"; | 1090 | err = "alloc from reserved chunk failed"; |
1063 | goto fail_unlock; | 1091 | goto fail_unlock; |
1064 | } | 1092 | } |
1093 | |||
1094 | while ((new_alloc = pcpu_need_to_extend(chunk))) { | ||
1095 | spin_unlock_irqrestore(&pcpu_lock, flags); | ||
1096 | if (pcpu_extend_area_map(chunk, new_alloc) < 0) { | ||
1097 | err = "failed to extend area map of reserved chunk"; | ||
1098 | goto fail_unlock_mutex; | ||
1099 | } | ||
1100 | spin_lock_irqsave(&pcpu_lock, flags); | ||
1101 | } | ||
1102 | |||
1065 | off = pcpu_alloc_area(chunk, size, align); | 1103 | off = pcpu_alloc_area(chunk, size, align); |
1066 | if (off >= 0) | 1104 | if (off >= 0) |
1067 | goto area_found; | 1105 | goto area_found; |
1106 | |||
1068 | err = "alloc from reserved chunk failed"; | 1107 | err = "alloc from reserved chunk failed"; |
1069 | goto fail_unlock; | 1108 | goto fail_unlock; |
1070 | } | 1109 | } |
@@ -1076,14 +1115,20 @@ restart: | |||
1076 | if (size > chunk->contig_hint) | 1115 | if (size > chunk->contig_hint) |
1077 | continue; | 1116 | continue; |
1078 | 1117 | ||
1079 | switch (pcpu_extend_area_map(chunk)) { | 1118 | new_alloc = pcpu_need_to_extend(chunk); |
1080 | case 0: | 1119 | if (new_alloc) { |
1081 | break; | 1120 | spin_unlock_irqrestore(&pcpu_lock, flags); |
1082 | case 1: | 1121 | if (pcpu_extend_area_map(chunk, |
1083 | goto restart; /* pcpu_lock dropped, restart */ | 1122 | new_alloc) < 0) { |
1084 | default: | 1123 | err = "failed to extend area map"; |
1085 | err = "failed to extend area map"; | 1124 | goto fail_unlock_mutex; |
1086 | goto fail_unlock; | 1125 | } |
1126 | spin_lock_irqsave(&pcpu_lock, flags); | ||
1127 | /* | ||
1128 | * pcpu_lock has been dropped, need to | ||
1129 | * restart cpu_slot list walking. | ||
1130 | */ | ||
1131 | goto restart; | ||
1087 | } | 1132 | } |
1088 | 1133 | ||
1089 | off = pcpu_alloc_area(chunk, size, align); | 1134 | off = pcpu_alloc_area(chunk, size, align); |
@@ -1093,7 +1138,7 @@ restart: | |||
1093 | } | 1138 | } |
1094 | 1139 | ||
1095 | /* hmmm... no space left, create a new chunk */ | 1140 | /* hmmm... no space left, create a new chunk */ |
1096 | spin_unlock_irq(&pcpu_lock); | 1141 | spin_unlock_irqrestore(&pcpu_lock, flags); |
1097 | 1142 | ||
1098 | chunk = alloc_pcpu_chunk(); | 1143 | chunk = alloc_pcpu_chunk(); |
1099 | if (!chunk) { | 1144 | if (!chunk) { |
@@ -1101,16 +1146,16 @@ restart: | |||
1101 | goto fail_unlock_mutex; | 1146 | goto fail_unlock_mutex; |
1102 | } | 1147 | } |
1103 | 1148 | ||
1104 | spin_lock_irq(&pcpu_lock); | 1149 | spin_lock_irqsave(&pcpu_lock, flags); |
1105 | pcpu_chunk_relocate(chunk, -1); | 1150 | pcpu_chunk_relocate(chunk, -1); |
1106 | goto restart; | 1151 | goto restart; |
1107 | 1152 | ||
1108 | area_found: | 1153 | area_found: |
1109 | spin_unlock_irq(&pcpu_lock); | 1154 | spin_unlock_irqrestore(&pcpu_lock, flags); |
1110 | 1155 | ||
1111 | /* populate, map and clear the area */ | 1156 | /* populate, map and clear the area */ |
1112 | if (pcpu_populate_chunk(chunk, off, size)) { | 1157 | if (pcpu_populate_chunk(chunk, off, size)) { |
1113 | spin_lock_irq(&pcpu_lock); | 1158 | spin_lock_irqsave(&pcpu_lock, flags); |
1114 | pcpu_free_area(chunk, off); | 1159 | pcpu_free_area(chunk, off); |
1115 | err = "failed to populate"; | 1160 | err = "failed to populate"; |
1116 | goto fail_unlock; | 1161 | goto fail_unlock; |
@@ -1122,7 +1167,7 @@ area_found: | |||
1122 | return __addr_to_pcpu_ptr(chunk->base_addr + off); | 1167 | return __addr_to_pcpu_ptr(chunk->base_addr + off); |
1123 | 1168 | ||
1124 | fail_unlock: | 1169 | fail_unlock: |
1125 | spin_unlock_irq(&pcpu_lock); | 1170 | spin_unlock_irqrestore(&pcpu_lock, flags); |
1126 | fail_unlock_mutex: | 1171 | fail_unlock_mutex: |
1127 | mutex_unlock(&pcpu_alloc_mutex); | 1172 | mutex_unlock(&pcpu_alloc_mutex); |
1128 | if (warn_limit) { | 1173 | if (warn_limit) { |
@@ -1254,6 +1299,27 @@ void free_percpu(void *ptr) | |||
1254 | } | 1299 | } |
1255 | EXPORT_SYMBOL_GPL(free_percpu); | 1300 | EXPORT_SYMBOL_GPL(free_percpu); |
1256 | 1301 | ||
1302 | /** | ||
1303 | * per_cpu_ptr_to_phys - convert translated percpu address to physical address | ||
1304 | * @addr: the address to be converted to physical address | ||
1305 | * | ||
1306 | * Given @addr which is dereferenceable address obtained via one of | ||
1307 | * percpu access macros, this function translates it into its physical | ||
1308 | * address. The caller is responsible for ensuring @addr stays valid | ||
1309 | * until this function finishes. | ||
1310 | * | ||
1311 | * RETURNS: | ||
1312 | * The physical address for @addr. | ||
1313 | */ | ||
1314 | phys_addr_t per_cpu_ptr_to_phys(void *addr) | ||
1315 | { | ||
1316 | if ((unsigned long)addr < VMALLOC_START || | ||
1317 | (unsigned long)addr >= VMALLOC_END) | ||
1318 | return __pa(addr); | ||
1319 | else | ||
1320 | return page_to_phys(vmalloc_to_page(addr)); | ||
1321 | } | ||
1322 | |||
1257 | static inline size_t pcpu_calc_fc_sizes(size_t static_size, | 1323 | static inline size_t pcpu_calc_fc_sizes(size_t static_size, |
1258 | size_t reserved_size, | 1324 | size_t reserved_size, |
1259 | ssize_t *dyn_sizep) | 1325 | ssize_t *dyn_sizep) |