diff options
| -rw-r--r-- | mm/percpu.c | 141 |
1 files changed, 44 insertions, 97 deletions
diff --git a/mm/percpu.c b/mm/percpu.c index 1aa5d8fbca12..c0b2c1a76e81 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
| @@ -23,7 +23,7 @@ | |||
| 23 | * Allocation is done in offset-size areas of single unit space. Ie, | 23 | * Allocation is done in offset-size areas of single unit space. Ie, |
| 24 | * an area of 512 bytes at 6k in c1 occupies 512 bytes at 6k of c1:u0, | 24 | * an area of 512 bytes at 6k in c1 occupies 512 bytes at 6k of c1:u0, |
| 25 | * c1:u1, c1:u2 and c1:u3. Percpu access can be done by configuring | 25 | * c1:u1, c1:u2 and c1:u3. Percpu access can be done by configuring |
| 26 | * percpu base registers UNIT_SIZE apart. | 26 | * percpu base registers pcpu_unit_size apart. |
| 27 | * | 27 | * |
| 28 | * There are usually many small percpu allocations many of them as | 28 | * There are usually many small percpu allocations many of them as |
| 29 | * small as 4 bytes. The allocator organizes chunks into lists | 29 | * small as 4 bytes. The allocator organizes chunks into lists |
| @@ -38,8 +38,8 @@ | |||
| 38 | * region and negative allocated. Allocation inside a chunk is done | 38 | * region and negative allocated. Allocation inside a chunk is done |
| 39 | * by scanning this map sequentially and serving the first matching | 39 | * by scanning this map sequentially and serving the first matching |
| 40 | * entry. This is mostly copied from the percpu_modalloc() allocator. | 40 | * entry. This is mostly copied from the percpu_modalloc() allocator. |
| 41 | * Chunks are also linked into a rb tree to ease address to chunk | 41 | * Chunks can be determined from the address using the index field |
| 42 | * mapping during free. | 42 | * in the page struct. The index field contains a pointer to the chunk. |
| 43 | * | 43 | * |
| 44 | * To use this allocator, arch code should do the followings. | 44 | * To use this allocator, arch code should do the followings. |
| 45 | * | 45 | * |
| @@ -61,7 +61,6 @@ | |||
| 61 | #include <linux/mutex.h> | 61 | #include <linux/mutex.h> |
| 62 | #include <linux/percpu.h> | 62 | #include <linux/percpu.h> |
| 63 | #include <linux/pfn.h> | 63 | #include <linux/pfn.h> |
| 64 | #include <linux/rbtree.h> | ||
| 65 | #include <linux/slab.h> | 64 | #include <linux/slab.h> |
| 66 | #include <linux/spinlock.h> | 65 | #include <linux/spinlock.h> |
| 67 | #include <linux/vmalloc.h> | 66 | #include <linux/vmalloc.h> |
| @@ -88,7 +87,6 @@ | |||
| 88 | 87 | ||
| 89 | struct pcpu_chunk { | 88 | struct pcpu_chunk { |
| 90 | struct list_head list; /* linked to pcpu_slot lists */ | 89 | struct list_head list; /* linked to pcpu_slot lists */ |
| 91 | struct rb_node rb_node; /* key is chunk->vm->addr */ | ||
| 92 | int free_size; /* free bytes in the chunk */ | 90 | int free_size; /* free bytes in the chunk */ |
| 93 | int contig_hint; /* max contiguous size hint */ | 91 | int contig_hint; /* max contiguous size hint */ |
| 94 | struct vm_struct *vm; /* mapped vmalloc region */ | 92 | struct vm_struct *vm; /* mapped vmalloc region */ |
| @@ -110,9 +108,21 @@ static size_t pcpu_chunk_struct_size __read_mostly; | |||
| 110 | void *pcpu_base_addr __read_mostly; | 108 | void *pcpu_base_addr __read_mostly; |
| 111 | EXPORT_SYMBOL_GPL(pcpu_base_addr); | 109 | EXPORT_SYMBOL_GPL(pcpu_base_addr); |
| 112 | 110 | ||
| 113 | /* optional reserved chunk, only accessible for reserved allocations */ | 111 | /* |
| 112 | * The first chunk which always exists. Note that unlike other | ||
| 113 | * chunks, this one can be allocated and mapped in several different | ||
| 114 | * ways and thus often doesn't live in the vmalloc area. | ||
| 115 | */ | ||
| 116 | static struct pcpu_chunk *pcpu_first_chunk; | ||
| 117 | |||
| 118 | /* | ||
| 119 | * Optional reserved chunk. This chunk reserves part of the first | ||
| 120 | * chunk and serves it for reserved allocations. The amount of | ||
| 121 | * reserved offset is in pcpu_reserved_chunk_limit. When reserved | ||
| 122 | * area doesn't exist, the following variables contain NULL and 0 | ||
| 123 | * respectively. | ||
| 124 | */ | ||
| 114 | static struct pcpu_chunk *pcpu_reserved_chunk; | 125 | static struct pcpu_chunk *pcpu_reserved_chunk; |
| 115 | /* offset limit of the reserved chunk */ | ||
| 116 | static int pcpu_reserved_chunk_limit; | 126 | static int pcpu_reserved_chunk_limit; |
| 117 | 127 | ||
| 118 | /* | 128 | /* |
| @@ -121,7 +131,7 @@ static int pcpu_reserved_chunk_limit; | |||
| 121 | * There are two locks - pcpu_alloc_mutex and pcpu_lock. The former | 131 | * There are two locks - pcpu_alloc_mutex and pcpu_lock. The former |
| 122 | * protects allocation/reclaim paths, chunks and chunk->page arrays. | 132 | * protects allocation/reclaim paths, chunks and chunk->page arrays. |
| 123 | * The latter is a spinlock and protects the index data structures - | 133 | * The latter is a spinlock and protects the index data structures - |
| 124 | * chunk slots, rbtree, chunks and area maps in chunks. | 134 | * chunk slots, chunks and area maps in chunks. |
| 125 | * | 135 | * |
| 126 | * During allocation, pcpu_alloc_mutex is kept locked all the time and | 136 | * During allocation, pcpu_alloc_mutex is kept locked all the time and |
| 127 | * pcpu_lock is grabbed and released as necessary. All actual memory | 137 | * pcpu_lock is grabbed and released as necessary. All actual memory |
| @@ -140,7 +150,6 @@ static DEFINE_MUTEX(pcpu_alloc_mutex); /* protects whole alloc and reclaim */ | |||
| 140 | static DEFINE_SPINLOCK(pcpu_lock); /* protects index data structures */ | 150 | static DEFINE_SPINLOCK(pcpu_lock); /* protects index data structures */ |
| 141 | 151 | ||
| 142 | static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */ | 152 | static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */ |
| 143 | static struct rb_root pcpu_addr_root = RB_ROOT; /* chunks by address */ | ||
| 144 | 153 | ||
| 145 | /* reclaim work to release fully free chunks, scheduled from free path */ | 154 | /* reclaim work to release fully free chunks, scheduled from free path */ |
| 146 | static void pcpu_reclaim(struct work_struct *work); | 155 | static void pcpu_reclaim(struct work_struct *work); |
| @@ -191,6 +200,18 @@ static bool pcpu_chunk_page_occupied(struct pcpu_chunk *chunk, | |||
| 191 | return *pcpu_chunk_pagep(chunk, 0, page_idx) != NULL; | 200 | return *pcpu_chunk_pagep(chunk, 0, page_idx) != NULL; |
| 192 | } | 201 | } |
| 193 | 202 | ||
| 203 | /* set the pointer to a chunk in a page struct */ | ||
| 204 | static void pcpu_set_page_chunk(struct page *page, struct pcpu_chunk *pcpu) | ||
| 205 | { | ||
| 206 | page->index = (unsigned long)pcpu; | ||
| 207 | } | ||
| 208 | |||
| 209 | /* obtain pointer to a chunk from a page struct */ | ||
| 210 | static struct pcpu_chunk *pcpu_get_page_chunk(struct page *page) | ||
| 211 | { | ||
| 212 | return (struct pcpu_chunk *)page->index; | ||
| 213 | } | ||
| 214 | |||
| 194 | /** | 215 | /** |
| 195 | * pcpu_mem_alloc - allocate memory | 216 | * pcpu_mem_alloc - allocate memory |
| 196 | * @size: bytes to allocate | 217 | * @size: bytes to allocate |
| @@ -257,93 +278,26 @@ static void pcpu_chunk_relocate(struct pcpu_chunk *chunk, int oslot) | |||
| 257 | } | 278 | } |
| 258 | } | 279 | } |
| 259 | 280 | ||
| 260 | static struct rb_node **pcpu_chunk_rb_search(void *addr, | ||
| 261 | struct rb_node **parentp) | ||
| 262 | { | ||
| 263 | struct rb_node **p = &pcpu_addr_root.rb_node; | ||
| 264 | struct rb_node *parent = NULL; | ||
| 265 | struct pcpu_chunk *chunk; | ||
| 266 | |||
| 267 | while (*p) { | ||
| 268 | parent = *p; | ||
| 269 | chunk = rb_entry(parent, struct pcpu_chunk, rb_node); | ||
| 270 | |||
| 271 | if (addr < chunk->vm->addr) | ||
| 272 | p = &(*p)->rb_left; | ||
| 273 | else if (addr > chunk->vm->addr) | ||
| 274 | p = &(*p)->rb_right; | ||
| 275 | else | ||
| 276 | break; | ||
| 277 | } | ||
| 278 | |||
| 279 | if (parentp) | ||
| 280 | *parentp = parent; | ||
| 281 | return p; | ||
| 282 | } | ||
| 283 | |||
| 284 | /** | 281 | /** |
| 285 | * pcpu_chunk_addr_search - search for chunk containing specified address | 282 | * pcpu_chunk_addr_search - determine chunk containing specified address |
| 286 | * @addr: address to search for | 283 | * @addr: address for which the chunk needs to be determined. |
| 287 | * | ||
| 288 | * Look for chunk which might contain @addr. More specifically, it | ||
| 289 | * searchs for the chunk with the highest start address which isn't | ||
| 290 | * beyond @addr. | ||
| 291 | * | ||
| 292 | * CONTEXT: | ||
| 293 | * pcpu_lock. | ||
| 294 | * | 284 | * |
| 295 | * RETURNS: | 285 | * RETURNS: |
| 296 | * The address of the found chunk. | 286 | * The address of the found chunk. |
| 297 | */ | 287 | */ |
| 298 | static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) | 288 | static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) |
| 299 | { | 289 | { |
| 300 | struct rb_node *n, *parent; | 290 | void *first_start = pcpu_first_chunk->vm->addr; |
| 301 | struct pcpu_chunk *chunk; | ||
| 302 | 291 | ||
| 303 | /* is it in the reserved chunk? */ | 292 | /* is it in the first chunk? */ |
| 304 | if (pcpu_reserved_chunk) { | 293 | if (addr >= first_start && addr < first_start + pcpu_chunk_size) { |
| 305 | void *start = pcpu_reserved_chunk->vm->addr; | 294 | /* is it in the reserved area? */ |
| 306 | 295 | if (addr < first_start + pcpu_reserved_chunk_limit) | |
| 307 | if (addr >= start && addr < start + pcpu_reserved_chunk_limit) | ||
| 308 | return pcpu_reserved_chunk; | 296 | return pcpu_reserved_chunk; |
| 297 | return pcpu_first_chunk; | ||
| 309 | } | 298 | } |
| 310 | 299 | ||
| 311 | /* nah... search the regular ones */ | 300 | return pcpu_get_page_chunk(vmalloc_to_page(addr)); |
| 312 | n = *pcpu_chunk_rb_search(addr, &parent); | ||
| 313 | if (!n) { | ||
| 314 | /* no exactly matching chunk, the parent is the closest */ | ||
| 315 | n = parent; | ||
| 316 | BUG_ON(!n); | ||
| 317 | } | ||
| 318 | chunk = rb_entry(n, struct pcpu_chunk, rb_node); | ||
| 319 | |||
| 320 | if (addr < chunk->vm->addr) { | ||
| 321 | /* the parent was the next one, look for the previous one */ | ||
| 322 | n = rb_prev(n); | ||
| 323 | BUG_ON(!n); | ||
| 324 | chunk = rb_entry(n, struct pcpu_chunk, rb_node); | ||
| 325 | } | ||
| 326 | |||
| 327 | return chunk; | ||
| 328 | } | ||
| 329 | |||
| 330 | /** | ||
| 331 | * pcpu_chunk_addr_insert - insert chunk into address rb tree | ||
| 332 | * @new: chunk to insert | ||
| 333 | * | ||
| 334 | * Insert @new into address rb tree. | ||
| 335 | * | ||
| 336 | * CONTEXT: | ||
| 337 | * pcpu_lock. | ||
| 338 | */ | ||
| 339 | static void pcpu_chunk_addr_insert(struct pcpu_chunk *new) | ||
| 340 | { | ||
| 341 | struct rb_node **p, *parent; | ||
| 342 | |||
| 343 | p = pcpu_chunk_rb_search(new->vm->addr, &parent); | ||
| 344 | BUG_ON(*p); | ||
| 345 | rb_link_node(&new->rb_node, parent, p); | ||
| 346 | rb_insert_color(&new->rb_node, &pcpu_addr_root); | ||
| 347 | } | 301 | } |
| 348 | 302 | ||
| 349 | /** | 303 | /** |
| @@ -755,6 +709,7 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size) | |||
| 755 | alloc_mask, 0); | 709 | alloc_mask, 0); |
| 756 | if (!*pagep) | 710 | if (!*pagep) |
| 757 | goto err; | 711 | goto err; |
| 712 | pcpu_set_page_chunk(*pagep, chunk); | ||
| 758 | } | 713 | } |
| 759 | } | 714 | } |
| 760 | 715 | ||
| @@ -879,7 +834,6 @@ restart: | |||
| 879 | 834 | ||
| 880 | spin_lock_irq(&pcpu_lock); | 835 | spin_lock_irq(&pcpu_lock); |
| 881 | pcpu_chunk_relocate(chunk, -1); | 836 | pcpu_chunk_relocate(chunk, -1); |
| 882 | pcpu_chunk_addr_insert(chunk); | ||
| 883 | goto restart; | 837 | goto restart; |
| 884 | 838 | ||
| 885 | area_found: | 839 | area_found: |
| @@ -968,7 +922,6 @@ static void pcpu_reclaim(struct work_struct *work) | |||
| 968 | if (chunk == list_first_entry(head, struct pcpu_chunk, list)) | 922 | if (chunk == list_first_entry(head, struct pcpu_chunk, list)) |
| 969 | continue; | 923 | continue; |
| 970 | 924 | ||
| 971 | rb_erase(&chunk->rb_node, &pcpu_addr_root); | ||
| 972 | list_move(&chunk->list, &todo); | 925 | list_move(&chunk->list, &todo); |
| 973 | } | 926 | } |
| 974 | 927 | ||
| @@ -1147,7 +1100,8 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, | |||
| 1147 | 1100 | ||
| 1148 | if (reserved_size) { | 1101 | if (reserved_size) { |
| 1149 | schunk->free_size = reserved_size; | 1102 | schunk->free_size = reserved_size; |
| 1150 | pcpu_reserved_chunk = schunk; /* not for dynamic alloc */ | 1103 | pcpu_reserved_chunk = schunk; |
| 1104 | pcpu_reserved_chunk_limit = static_size + reserved_size; | ||
| 1151 | } else { | 1105 | } else { |
| 1152 | schunk->free_size = dyn_size; | 1106 | schunk->free_size = dyn_size; |
| 1153 | dyn_size = 0; /* dynamic area covered */ | 1107 | dyn_size = 0; /* dynamic area covered */ |
| @@ -1158,8 +1112,6 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, | |||
| 1158 | if (schunk->free_size) | 1112 | if (schunk->free_size) |
| 1159 | schunk->map[schunk->map_used++] = schunk->free_size; | 1113 | schunk->map[schunk->map_used++] = schunk->free_size; |
| 1160 | 1114 | ||
| 1161 | pcpu_reserved_chunk_limit = static_size + schunk->free_size; | ||
| 1162 | |||
| 1163 | /* init dynamic chunk if necessary */ | 1115 | /* init dynamic chunk if necessary */ |
| 1164 | if (dyn_size) { | 1116 | if (dyn_size) { |
| 1165 | dchunk = alloc_bootmem(sizeof(struct pcpu_chunk)); | 1117 | dchunk = alloc_bootmem(sizeof(struct pcpu_chunk)); |
| @@ -1226,13 +1178,8 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, | |||
| 1226 | } | 1178 | } |
| 1227 | 1179 | ||
| 1228 | /* link the first chunk in */ | 1180 | /* link the first chunk in */ |
| 1229 | if (!dchunk) { | 1181 | pcpu_first_chunk = dchunk ?: schunk; |
| 1230 | pcpu_chunk_relocate(schunk, -1); | 1182 | pcpu_chunk_relocate(pcpu_first_chunk, -1); |
| 1231 | pcpu_chunk_addr_insert(schunk); | ||
| 1232 | } else { | ||
| 1233 | pcpu_chunk_relocate(dchunk, -1); | ||
| 1234 | pcpu_chunk_addr_insert(dchunk); | ||
| 1235 | } | ||
| 1236 | 1183 | ||
| 1237 | /* we're done */ | 1184 | /* we're done */ |
| 1238 | pcpu_base_addr = (void *)pcpu_chunk_addr(schunk, 0, 0); | 1185 | pcpu_base_addr = (void *)pcpu_chunk_addr(schunk, 0, 0); |
