diff options
-rw-r--r-- | mm/z3fold.c | 479 |
1 files changed, 344 insertions, 135 deletions
diff --git a/mm/z3fold.c b/mm/z3fold.c index 54f63c4a809a..486550df32be 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c | |||
@@ -23,10 +23,13 @@ | |||
23 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | 23 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
24 | 24 | ||
25 | #include <linux/atomic.h> | 25 | #include <linux/atomic.h> |
26 | #include <linux/sched.h> | ||
26 | #include <linux/list.h> | 27 | #include <linux/list.h> |
27 | #include <linux/mm.h> | 28 | #include <linux/mm.h> |
28 | #include <linux/module.h> | 29 | #include <linux/module.h> |
30 | #include <linux/percpu.h> | ||
29 | #include <linux/preempt.h> | 31 | #include <linux/preempt.h> |
32 | #include <linux/workqueue.h> | ||
30 | #include <linux/slab.h> | 33 | #include <linux/slab.h> |
31 | #include <linux/spinlock.h> | 34 | #include <linux/spinlock.h> |
32 | #include <linux/zpool.h> | 35 | #include <linux/zpool.h> |
@@ -48,11 +51,15 @@ enum buddy { | |||
48 | }; | 51 | }; |
49 | 52 | ||
50 | /* | 53 | /* |
51 | * struct z3fold_header - z3fold page metadata occupying the first chunk of each | 54 | * struct z3fold_header - z3fold page metadata occupying first chunks of each |
52 | * z3fold page, except for HEADLESS pages | 55 | * z3fold page, except for HEADLESS pages |
53 | * @buddy: links the z3fold page into the relevant list in the pool | 56 | * @buddy: links the z3fold page into the relevant list in the |
57 | * pool | ||
54 | * @page_lock: per-page lock | 58 | * @page_lock: per-page lock |
55 | * @refcount: reference cound for the z3fold page | 59 | * @refcount: reference count for the z3fold page |
60 | * @work: work_struct for page layout optimization | ||
61 | * @pool: pointer to the pool which this page belongs to | ||
62 | * @cpu: CPU which this page "belongs" to | ||
56 | * @first_chunks: the size of the first buddy in chunks, 0 if free | 63 | * @first_chunks: the size of the first buddy in chunks, 0 if free |
57 | * @middle_chunks: the size of the middle buddy in chunks, 0 if free | 64 | * @middle_chunks: the size of the middle buddy in chunks, 0 if free |
58 | * @last_chunks: the size of the last buddy in chunks, 0 if free | 65 | * @last_chunks: the size of the last buddy in chunks, 0 if free |
@@ -62,6 +69,9 @@ struct z3fold_header { | |||
62 | struct list_head buddy; | 69 | struct list_head buddy; |
63 | spinlock_t page_lock; | 70 | spinlock_t page_lock; |
64 | struct kref refcount; | 71 | struct kref refcount; |
72 | struct work_struct work; | ||
73 | struct z3fold_pool *pool; | ||
74 | short cpu; | ||
65 | unsigned short first_chunks; | 75 | unsigned short first_chunks; |
66 | unsigned short middle_chunks; | 76 | unsigned short middle_chunks; |
67 | unsigned short last_chunks; | 77 | unsigned short last_chunks; |
@@ -92,28 +102,39 @@ struct z3fold_header { | |||
92 | 102 | ||
93 | /** | 103 | /** |
94 | * struct z3fold_pool - stores metadata for each z3fold pool | 104 | * struct z3fold_pool - stores metadata for each z3fold pool |
95 | * @lock: protects all pool fields and first|last_chunk fields of any | 105 | * @name: pool name |
96 | * z3fold page in the pool | 106 | * @lock: protects pool unbuddied/lru lists |
97 | * @unbuddied: array of lists tracking z3fold pages that contain 2- buddies; | 107 | * @stale_lock: protects pool stale page list |
98 | * the lists each z3fold page is added to depends on the size of | 108 | * @unbuddied: per-cpu array of lists tracking z3fold pages that contain 2- |
99 | * its free region. | 109 | * buddies; the list each z3fold page is added to depends on |
110 | * the size of its free region. | ||
100 | * @lru: list tracking the z3fold pages in LRU order by most recently | 111 | * @lru: list tracking the z3fold pages in LRU order by most recently |
101 | * added buddy. | 112 | * added buddy. |
113 | * @stale: list of pages marked for freeing | ||
102 | * @pages_nr: number of z3fold pages in the pool. | 114 | * @pages_nr: number of z3fold pages in the pool. |
103 | * @ops: pointer to a structure of user defined operations specified at | 115 | * @ops: pointer to a structure of user defined operations specified at |
104 | * pool creation time. | 116 | * pool creation time. |
117 | * @compact_wq: workqueue for page layout background optimization | ||
118 | * @release_wq: workqueue for safe page release | ||
119 | * @work: work_struct for safe page release | ||
105 | * | 120 | * |
106 | * This structure is allocated at pool creation time and maintains metadata | 121 | * This structure is allocated at pool creation time and maintains metadata |
107 | * pertaining to a particular z3fold pool. | 122 | * pertaining to a particular z3fold pool. |
108 | */ | 123 | */ |
109 | struct z3fold_pool { | 124 | struct z3fold_pool { |
125 | const char *name; | ||
110 | spinlock_t lock; | 126 | spinlock_t lock; |
111 | struct list_head unbuddied[NCHUNKS]; | 127 | spinlock_t stale_lock; |
128 | struct list_head *unbuddied; | ||
112 | struct list_head lru; | 129 | struct list_head lru; |
130 | struct list_head stale; | ||
113 | atomic64_t pages_nr; | 131 | atomic64_t pages_nr; |
114 | const struct z3fold_ops *ops; | 132 | const struct z3fold_ops *ops; |
115 | struct zpool *zpool; | 133 | struct zpool *zpool; |
116 | const struct zpool_ops *zpool_ops; | 134 | const struct zpool_ops *zpool_ops; |
135 | struct workqueue_struct *compact_wq; | ||
136 | struct workqueue_struct *release_wq; | ||
137 | struct work_struct work; | ||
117 | }; | 138 | }; |
118 | 139 | ||
119 | /* | 140 | /* |
@@ -122,9 +143,10 @@ struct z3fold_pool { | |||
122 | enum z3fold_page_flags { | 143 | enum z3fold_page_flags { |
123 | PAGE_HEADLESS = 0, | 144 | PAGE_HEADLESS = 0, |
124 | MIDDLE_CHUNK_MAPPED, | 145 | MIDDLE_CHUNK_MAPPED, |
146 | NEEDS_COMPACTING, | ||
147 | PAGE_STALE | ||
125 | }; | 148 | }; |
126 | 149 | ||
127 | |||
128 | /***************** | 150 | /***************** |
129 | * Helpers | 151 | * Helpers |
130 | *****************/ | 152 | *****************/ |
@@ -138,14 +160,19 @@ static int size_to_chunks(size_t size) | |||
138 | #define for_each_unbuddied_list(_iter, _begin) \ | 160 | #define for_each_unbuddied_list(_iter, _begin) \ |
139 | for ((_iter) = (_begin); (_iter) < NCHUNKS; (_iter)++) | 161 | for ((_iter) = (_begin); (_iter) < NCHUNKS; (_iter)++) |
140 | 162 | ||
163 | static void compact_page_work(struct work_struct *w); | ||
164 | |||
141 | /* Initializes the z3fold header of a newly allocated z3fold page */ | 165 | /* Initializes the z3fold header of a newly allocated z3fold page */ |
142 | static struct z3fold_header *init_z3fold_page(struct page *page) | 166 | static struct z3fold_header *init_z3fold_page(struct page *page, |
167 | struct z3fold_pool *pool) | ||
143 | { | 168 | { |
144 | struct z3fold_header *zhdr = page_address(page); | 169 | struct z3fold_header *zhdr = page_address(page); |
145 | 170 | ||
146 | INIT_LIST_HEAD(&page->lru); | 171 | INIT_LIST_HEAD(&page->lru); |
147 | clear_bit(PAGE_HEADLESS, &page->private); | 172 | clear_bit(PAGE_HEADLESS, &page->private); |
148 | clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); | 173 | clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); |
174 | clear_bit(NEEDS_COMPACTING, &page->private); | ||
175 | clear_bit(PAGE_STALE, &page->private); | ||
149 | 176 | ||
150 | spin_lock_init(&zhdr->page_lock); | 177 | spin_lock_init(&zhdr->page_lock); |
151 | kref_init(&zhdr->refcount); | 178 | kref_init(&zhdr->refcount); |
@@ -154,7 +181,10 @@ static struct z3fold_header *init_z3fold_page(struct page *page) | |||
154 | zhdr->last_chunks = 0; | 181 | zhdr->last_chunks = 0; |
155 | zhdr->first_num = 0; | 182 | zhdr->first_num = 0; |
156 | zhdr->start_middle = 0; | 183 | zhdr->start_middle = 0; |
184 | zhdr->cpu = -1; | ||
185 | zhdr->pool = pool; | ||
157 | INIT_LIST_HEAD(&zhdr->buddy); | 186 | INIT_LIST_HEAD(&zhdr->buddy); |
187 | INIT_WORK(&zhdr->work, compact_page_work); | ||
158 | return zhdr; | 188 | return zhdr; |
159 | } | 189 | } |
160 | 190 | ||
@@ -164,21 +194,6 @@ static void free_z3fold_page(struct page *page) | |||
164 | __free_page(page); | 194 | __free_page(page); |
165 | } | 195 | } |
166 | 196 | ||
167 | static void release_z3fold_page(struct kref *ref) | ||
168 | { | ||
169 | struct z3fold_header *zhdr; | ||
170 | struct page *page; | ||
171 | |||
172 | zhdr = container_of(ref, struct z3fold_header, refcount); | ||
173 | page = virt_to_page(zhdr); | ||
174 | |||
175 | if (!list_empty(&zhdr->buddy)) | ||
176 | list_del(&zhdr->buddy); | ||
177 | if (!list_empty(&page->lru)) | ||
178 | list_del(&page->lru); | ||
179 | free_z3fold_page(page); | ||
180 | } | ||
181 | |||
182 | /* Lock a z3fold page */ | 197 | /* Lock a z3fold page */ |
183 | static inline void z3fold_page_lock(struct z3fold_header *zhdr) | 198 | static inline void z3fold_page_lock(struct z3fold_header *zhdr) |
184 | { | 199 | { |
@@ -228,6 +243,76 @@ static enum buddy handle_to_buddy(unsigned long handle) | |||
228 | return (handle - zhdr->first_num) & BUDDY_MASK; | 243 | return (handle - zhdr->first_num) & BUDDY_MASK; |
229 | } | 244 | } |
230 | 245 | ||
246 | static void __release_z3fold_page(struct z3fold_header *zhdr, bool locked) | ||
247 | { | ||
248 | struct page *page = virt_to_page(zhdr); | ||
249 | struct z3fold_pool *pool = zhdr->pool; | ||
250 | |||
251 | WARN_ON(!list_empty(&zhdr->buddy)); | ||
252 | set_bit(PAGE_STALE, &page->private); | ||
253 | spin_lock(&pool->lock); | ||
254 | if (!list_empty(&page->lru)) | ||
255 | list_del(&page->lru); | ||
256 | spin_unlock(&pool->lock); | ||
257 | if (locked) | ||
258 | z3fold_page_unlock(zhdr); | ||
259 | spin_lock(&pool->stale_lock); | ||
260 | list_add(&zhdr->buddy, &pool->stale); | ||
261 | queue_work(pool->release_wq, &pool->work); | ||
262 | spin_unlock(&pool->stale_lock); | ||
263 | } | ||
264 | |||
265 | static void __attribute__((__unused__)) | ||
266 | release_z3fold_page(struct kref *ref) | ||
267 | { | ||
268 | struct z3fold_header *zhdr = container_of(ref, struct z3fold_header, | ||
269 | refcount); | ||
270 | __release_z3fold_page(zhdr, false); | ||
271 | } | ||
272 | |||
273 | static void release_z3fold_page_locked(struct kref *ref) | ||
274 | { | ||
275 | struct z3fold_header *zhdr = container_of(ref, struct z3fold_header, | ||
276 | refcount); | ||
277 | WARN_ON(z3fold_page_trylock(zhdr)); | ||
278 | __release_z3fold_page(zhdr, true); | ||
279 | } | ||
280 | |||
281 | static void release_z3fold_page_locked_list(struct kref *ref) | ||
282 | { | ||
283 | struct z3fold_header *zhdr = container_of(ref, struct z3fold_header, | ||
284 | refcount); | ||
285 | spin_lock(&zhdr->pool->lock); | ||
286 | list_del_init(&zhdr->buddy); | ||
287 | spin_unlock(&zhdr->pool->lock); | ||
288 | |||
289 | WARN_ON(z3fold_page_trylock(zhdr)); | ||
290 | __release_z3fold_page(zhdr, true); | ||
291 | } | ||
292 | |||
293 | static void free_pages_work(struct work_struct *w) | ||
294 | { | ||
295 | struct z3fold_pool *pool = container_of(w, struct z3fold_pool, work); | ||
296 | |||
297 | spin_lock(&pool->stale_lock); | ||
298 | while (!list_empty(&pool->stale)) { | ||
299 | struct z3fold_header *zhdr = list_first_entry(&pool->stale, | ||
300 | struct z3fold_header, buddy); | ||
301 | struct page *page = virt_to_page(zhdr); | ||
302 | |||
303 | list_del(&zhdr->buddy); | ||
304 | if (WARN_ON(!test_bit(PAGE_STALE, &page->private))) | ||
305 | continue; | ||
306 | clear_bit(NEEDS_COMPACTING, &page->private); | ||
307 | spin_unlock(&pool->stale_lock); | ||
308 | cancel_work_sync(&zhdr->work); | ||
309 | free_z3fold_page(page); | ||
310 | cond_resched(); | ||
311 | spin_lock(&pool->stale_lock); | ||
312 | } | ||
313 | spin_unlock(&pool->stale_lock); | ||
314 | } | ||
315 | |||
231 | /* | 316 | /* |
232 | * Returns the number of free chunks in a z3fold page. | 317 | * Returns the number of free chunks in a z3fold page. |
233 | * NB: can't be used with HEADLESS pages. | 318 | * NB: can't be used with HEADLESS pages. |
@@ -252,46 +337,6 @@ static int num_free_chunks(struct z3fold_header *zhdr) | |||
252 | return nfree; | 337 | return nfree; |
253 | } | 338 | } |
254 | 339 | ||
255 | /***************** | ||
256 | * API Functions | ||
257 | *****************/ | ||
258 | /** | ||
259 | * z3fold_create_pool() - create a new z3fold pool | ||
260 | * @gfp: gfp flags when allocating the z3fold pool structure | ||
261 | * @ops: user-defined operations for the z3fold pool | ||
262 | * | ||
263 | * Return: pointer to the new z3fold pool or NULL if the metadata allocation | ||
264 | * failed. | ||
265 | */ | ||
266 | static struct z3fold_pool *z3fold_create_pool(gfp_t gfp, | ||
267 | const struct z3fold_ops *ops) | ||
268 | { | ||
269 | struct z3fold_pool *pool; | ||
270 | int i; | ||
271 | |||
272 | pool = kzalloc(sizeof(struct z3fold_pool), gfp); | ||
273 | if (!pool) | ||
274 | return NULL; | ||
275 | spin_lock_init(&pool->lock); | ||
276 | for_each_unbuddied_list(i, 0) | ||
277 | INIT_LIST_HEAD(&pool->unbuddied[i]); | ||
278 | INIT_LIST_HEAD(&pool->lru); | ||
279 | atomic64_set(&pool->pages_nr, 0); | ||
280 | pool->ops = ops; | ||
281 | return pool; | ||
282 | } | ||
283 | |||
284 | /** | ||
285 | * z3fold_destroy_pool() - destroys an existing z3fold pool | ||
286 | * @pool: the z3fold pool to be destroyed | ||
287 | * | ||
288 | * The pool should be emptied before this function is called. | ||
289 | */ | ||
290 | static void z3fold_destroy_pool(struct z3fold_pool *pool) | ||
291 | { | ||
292 | kfree(pool); | ||
293 | } | ||
294 | |||
295 | static inline void *mchunk_memmove(struct z3fold_header *zhdr, | 340 | static inline void *mchunk_memmove(struct z3fold_header *zhdr, |
296 | unsigned short dst_chunk) | 341 | unsigned short dst_chunk) |
297 | { | 342 | { |
@@ -347,6 +392,117 @@ static int z3fold_compact_page(struct z3fold_header *zhdr) | |||
347 | return 0; | 392 | return 0; |
348 | } | 393 | } |
349 | 394 | ||
395 | static void do_compact_page(struct z3fold_header *zhdr, bool locked) | ||
396 | { | ||
397 | struct z3fold_pool *pool = zhdr->pool; | ||
398 | struct page *page; | ||
399 | struct list_head *unbuddied; | ||
400 | int fchunks; | ||
401 | |||
402 | page = virt_to_page(zhdr); | ||
403 | if (locked) | ||
404 | WARN_ON(z3fold_page_trylock(zhdr)); | ||
405 | else | ||
406 | z3fold_page_lock(zhdr); | ||
407 | if (test_bit(PAGE_STALE, &page->private) || | ||
408 | !test_and_clear_bit(NEEDS_COMPACTING, &page->private)) { | ||
409 | z3fold_page_unlock(zhdr); | ||
410 | return; | ||
411 | } | ||
412 | spin_lock(&pool->lock); | ||
413 | list_del_init(&zhdr->buddy); | ||
414 | spin_unlock(&pool->lock); | ||
415 | |||
416 | z3fold_compact_page(zhdr); | ||
417 | unbuddied = get_cpu_ptr(pool->unbuddied); | ||
418 | fchunks = num_free_chunks(zhdr); | ||
419 | if (fchunks < NCHUNKS && | ||
420 | (!zhdr->first_chunks || !zhdr->middle_chunks || | ||
421 | !zhdr->last_chunks)) { | ||
422 | /* the page's not completely free and it's unbuddied */ | ||
423 | spin_lock(&pool->lock); | ||
424 | list_add(&zhdr->buddy, &unbuddied[fchunks]); | ||
425 | spin_unlock(&pool->lock); | ||
426 | zhdr->cpu = smp_processor_id(); | ||
427 | } | ||
428 | put_cpu_ptr(pool->unbuddied); | ||
429 | z3fold_page_unlock(zhdr); | ||
430 | } | ||
431 | |||
432 | static void compact_page_work(struct work_struct *w) | ||
433 | { | ||
434 | struct z3fold_header *zhdr = container_of(w, struct z3fold_header, | ||
435 | work); | ||
436 | |||
437 | do_compact_page(zhdr, false); | ||
438 | } | ||
439 | |||
440 | |||
441 | /* | ||
442 | * API Functions | ||
443 | */ | ||
444 | |||
445 | /** | ||
446 | * z3fold_create_pool() - create a new z3fold pool | ||
447 | * @name: pool name | ||
448 | * @gfp: gfp flags when allocating the z3fold pool structure | ||
449 | * @ops: user-defined operations for the z3fold pool | ||
450 | * | ||
451 | * Return: pointer to the new z3fold pool or NULL if the metadata allocation | ||
452 | * failed. | ||
453 | */ | ||
454 | static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp, | ||
455 | const struct z3fold_ops *ops) | ||
456 | { | ||
457 | struct z3fold_pool *pool = NULL; | ||
458 | int i, cpu; | ||
459 | |||
460 | pool = kzalloc(sizeof(struct z3fold_pool), gfp); | ||
461 | if (!pool) | ||
462 | goto out; | ||
463 | spin_lock_init(&pool->lock); | ||
464 | spin_lock_init(&pool->stale_lock); | ||
465 | pool->unbuddied = __alloc_percpu(sizeof(struct list_head)*NCHUNKS, 2); | ||
466 | for_each_possible_cpu(cpu) { | ||
467 | struct list_head *unbuddied = | ||
468 | per_cpu_ptr(pool->unbuddied, cpu); | ||
469 | for_each_unbuddied_list(i, 0) | ||
470 | INIT_LIST_HEAD(&unbuddied[i]); | ||
471 | } | ||
472 | INIT_LIST_HEAD(&pool->lru); | ||
473 | INIT_LIST_HEAD(&pool->stale); | ||
474 | atomic64_set(&pool->pages_nr, 0); | ||
475 | pool->name = name; | ||
476 | pool->compact_wq = create_singlethread_workqueue(pool->name); | ||
477 | if (!pool->compact_wq) | ||
478 | goto out; | ||
479 | pool->release_wq = create_singlethread_workqueue(pool->name); | ||
480 | if (!pool->release_wq) | ||
481 | goto out_wq; | ||
482 | INIT_WORK(&pool->work, free_pages_work); | ||
483 | pool->ops = ops; | ||
484 | return pool; | ||
485 | |||
486 | out_wq: | ||
487 | destroy_workqueue(pool->compact_wq); | ||
488 | out: | ||
489 | kfree(pool); | ||
490 | return NULL; | ||
491 | } | ||
492 | |||
493 | /** | ||
494 | * z3fold_destroy_pool() - destroys an existing z3fold pool | ||
495 | * @pool: the z3fold pool to be destroyed | ||
496 | * | ||
497 | * The pool should be emptied before this function is called. | ||
498 | */ | ||
499 | static void z3fold_destroy_pool(struct z3fold_pool *pool) | ||
500 | { | ||
501 | destroy_workqueue(pool->release_wq); | ||
502 | destroy_workqueue(pool->compact_wq); | ||
503 | kfree(pool); | ||
504 | } | ||
505 | |||
350 | /** | 506 | /** |
351 | * z3fold_alloc() - allocates a region of a given size | 507 | * z3fold_alloc() - allocates a region of a given size |
352 | * @pool: z3fold pool from which to allocate | 508 | * @pool: z3fold pool from which to allocate |
@@ -371,8 +527,9 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp, | |||
371 | { | 527 | { |
372 | int chunks = 0, i, freechunks; | 528 | int chunks = 0, i, freechunks; |
373 | struct z3fold_header *zhdr = NULL; | 529 | struct z3fold_header *zhdr = NULL; |
530 | struct page *page = NULL; | ||
374 | enum buddy bud; | 531 | enum buddy bud; |
375 | struct page *page; | 532 | bool can_sleep = (gfp & __GFP_RECLAIM) == __GFP_RECLAIM; |
376 | 533 | ||
377 | if (!size || (gfp & __GFP_HIGHMEM)) | 534 | if (!size || (gfp & __GFP_HIGHMEM)) |
378 | return -EINVAL; | 535 | return -EINVAL; |
@@ -383,23 +540,57 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp, | |||
383 | if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE) | 540 | if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE) |
384 | bud = HEADLESS; | 541 | bud = HEADLESS; |
385 | else { | 542 | else { |
543 | struct list_head *unbuddied; | ||
386 | chunks = size_to_chunks(size); | 544 | chunks = size_to_chunks(size); |
387 | 545 | ||
546 | lookup: | ||
388 | /* First, try to find an unbuddied z3fold page. */ | 547 | /* First, try to find an unbuddied z3fold page. */ |
389 | zhdr = NULL; | 548 | unbuddied = get_cpu_ptr(pool->unbuddied); |
390 | for_each_unbuddied_list(i, chunks) { | 549 | for_each_unbuddied_list(i, chunks) { |
391 | spin_lock(&pool->lock); | 550 | struct list_head *l = &unbuddied[i]; |
392 | zhdr = list_first_entry_or_null(&pool->unbuddied[i], | 551 | |
552 | zhdr = list_first_entry_or_null(READ_ONCE(l), | ||
393 | struct z3fold_header, buddy); | 553 | struct z3fold_header, buddy); |
394 | if (!zhdr || !z3fold_page_trylock(zhdr)) { | 554 | |
395 | spin_unlock(&pool->lock); | 555 | if (!zhdr) |
396 | continue; | 556 | continue; |
557 | |||
558 | /* Re-check under lock. */ | ||
559 | spin_lock(&pool->lock); | ||
560 | l = &unbuddied[i]; | ||
561 | if (unlikely(zhdr != list_first_entry(READ_ONCE(l), | ||
562 | struct z3fold_header, buddy)) || | ||
563 | !z3fold_page_trylock(zhdr)) { | ||
564 | spin_unlock(&pool->lock); | ||
565 | put_cpu_ptr(pool->unbuddied); | ||
566 | goto lookup; | ||
397 | } | 567 | } |
398 | kref_get(&zhdr->refcount); | ||
399 | list_del_init(&zhdr->buddy); | 568 | list_del_init(&zhdr->buddy); |
569 | zhdr->cpu = -1; | ||
400 | spin_unlock(&pool->lock); | 570 | spin_unlock(&pool->lock); |
401 | 571 | ||
402 | page = virt_to_page(zhdr); | 572 | page = virt_to_page(zhdr); |
573 | if (test_bit(NEEDS_COMPACTING, &page->private)) { | ||
574 | z3fold_page_unlock(zhdr); | ||
575 | zhdr = NULL; | ||
576 | put_cpu_ptr(pool->unbuddied); | ||
577 | if (can_sleep) | ||
578 | cond_resched(); | ||
579 | goto lookup; | ||
580 | } | ||
581 | |||
582 | /* | ||
583 | * this page could not be removed from its unbuddied | ||
584 | * list while pool lock was held, and then we've taken | ||
585 | * page lock so kref_put could not be called before | ||
586 | * we got here, so it's safe to just call kref_get() | ||
587 | */ | ||
588 | kref_get(&zhdr->refcount); | ||
589 | break; | ||
590 | } | ||
591 | put_cpu_ptr(pool->unbuddied); | ||
592 | |||
593 | if (zhdr) { | ||
403 | if (zhdr->first_chunks == 0) { | 594 | if (zhdr->first_chunks == 0) { |
404 | if (zhdr->middle_chunks != 0 && | 595 | if (zhdr->middle_chunks != 0 && |
405 | chunks >= zhdr->start_middle) | 596 | chunks >= zhdr->start_middle) |
@@ -411,32 +602,49 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp, | |||
411 | else if (zhdr->middle_chunks == 0) | 602 | else if (zhdr->middle_chunks == 0) |
412 | bud = MIDDLE; | 603 | bud = MIDDLE; |
413 | else { | 604 | else { |
414 | z3fold_page_unlock(zhdr); | ||
415 | spin_lock(&pool->lock); | ||
416 | if (kref_put(&zhdr->refcount, | 605 | if (kref_put(&zhdr->refcount, |
417 | release_z3fold_page)) | 606 | release_z3fold_page_locked)) |
418 | atomic64_dec(&pool->pages_nr); | 607 | atomic64_dec(&pool->pages_nr); |
419 | spin_unlock(&pool->lock); | 608 | else |
609 | z3fold_page_unlock(zhdr); | ||
420 | pr_err("No free chunks in unbuddied\n"); | 610 | pr_err("No free chunks in unbuddied\n"); |
421 | WARN_ON(1); | 611 | WARN_ON(1); |
422 | continue; | 612 | goto lookup; |
423 | } | 613 | } |
424 | goto found; | 614 | goto found; |
425 | } | 615 | } |
426 | bud = FIRST; | 616 | bud = FIRST; |
427 | } | 617 | } |
428 | 618 | ||
429 | /* Couldn't find unbuddied z3fold page, create new one */ | 619 | spin_lock(&pool->stale_lock); |
430 | page = alloc_page(gfp); | 620 | zhdr = list_first_entry_or_null(&pool->stale, |
621 | struct z3fold_header, buddy); | ||
622 | /* | ||
623 | * Before allocating a page, let's see if we can take one from the | ||
624 | * stale pages list. cancel_work_sync() can sleep so we must make | ||
625 | * sure it won't be called in case we're in atomic context. | ||
626 | */ | ||
627 | if (zhdr && (can_sleep || !work_pending(&zhdr->work) || | ||
628 | !unlikely(work_busy(&zhdr->work)))) { | ||
629 | list_del(&zhdr->buddy); | ||
630 | clear_bit(NEEDS_COMPACTING, &page->private); | ||
631 | spin_unlock(&pool->stale_lock); | ||
632 | if (can_sleep) | ||
633 | cancel_work_sync(&zhdr->work); | ||
634 | page = virt_to_page(zhdr); | ||
635 | } else { | ||
636 | spin_unlock(&pool->stale_lock); | ||
637 | page = alloc_page(gfp); | ||
638 | } | ||
639 | |||
431 | if (!page) | 640 | if (!page) |
432 | return -ENOMEM; | 641 | return -ENOMEM; |
433 | 642 | ||
434 | atomic64_inc(&pool->pages_nr); | 643 | atomic64_inc(&pool->pages_nr); |
435 | zhdr = init_z3fold_page(page); | 644 | zhdr = init_z3fold_page(page, pool); |
436 | 645 | ||
437 | if (bud == HEADLESS) { | 646 | if (bud == HEADLESS) { |
438 | set_bit(PAGE_HEADLESS, &page->private); | 647 | set_bit(PAGE_HEADLESS, &page->private); |
439 | spin_lock(&pool->lock); | ||
440 | goto headless; | 648 | goto headless; |
441 | } | 649 | } |
442 | z3fold_page_lock(zhdr); | 650 | z3fold_page_lock(zhdr); |
@@ -451,15 +659,21 @@ found: | |||
451 | zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS; | 659 | zhdr->start_middle = zhdr->first_chunks + ZHDR_CHUNKS; |
452 | } | 660 | } |
453 | 661 | ||
454 | spin_lock(&pool->lock); | ||
455 | if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0 || | 662 | if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0 || |
456 | zhdr->middle_chunks == 0) { | 663 | zhdr->middle_chunks == 0) { |
664 | struct list_head *unbuddied = get_cpu_ptr(pool->unbuddied); | ||
665 | |||
457 | /* Add to unbuddied list */ | 666 | /* Add to unbuddied list */ |
458 | freechunks = num_free_chunks(zhdr); | 667 | freechunks = num_free_chunks(zhdr); |
459 | list_add(&zhdr->buddy, &pool->unbuddied[freechunks]); | 668 | spin_lock(&pool->lock); |
669 | list_add(&zhdr->buddy, &unbuddied[freechunks]); | ||
670 | spin_unlock(&pool->lock); | ||
671 | zhdr->cpu = smp_processor_id(); | ||
672 | put_cpu_ptr(pool->unbuddied); | ||
460 | } | 673 | } |
461 | 674 | ||
462 | headless: | 675 | headless: |
676 | spin_lock(&pool->lock); | ||
463 | /* Add/move z3fold page to beginning of LRU */ | 677 | /* Add/move z3fold page to beginning of LRU */ |
464 | if (!list_empty(&page->lru)) | 678 | if (!list_empty(&page->lru)) |
465 | list_del(&page->lru); | 679 | list_del(&page->lru); |
@@ -487,7 +701,6 @@ headless: | |||
487 | static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) | 701 | static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) |
488 | { | 702 | { |
489 | struct z3fold_header *zhdr; | 703 | struct z3fold_header *zhdr; |
490 | int freechunks; | ||
491 | struct page *page; | 704 | struct page *page; |
492 | enum buddy bud; | 705 | enum buddy bud; |
493 | 706 | ||
@@ -526,25 +739,27 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) | |||
526 | spin_unlock(&pool->lock); | 739 | spin_unlock(&pool->lock); |
527 | free_z3fold_page(page); | 740 | free_z3fold_page(page); |
528 | atomic64_dec(&pool->pages_nr); | 741 | atomic64_dec(&pool->pages_nr); |
529 | } else { | 742 | return; |
530 | if (zhdr->first_chunks != 0 || zhdr->middle_chunks != 0 || | 743 | } |
531 | zhdr->last_chunks != 0) { | 744 | |
532 | z3fold_compact_page(zhdr); | 745 | if (kref_put(&zhdr->refcount, release_z3fold_page_locked_list)) { |
533 | /* Add to the unbuddied list */ | 746 | atomic64_dec(&pool->pages_nr); |
534 | spin_lock(&pool->lock); | 747 | return; |
535 | if (!list_empty(&zhdr->buddy)) | 748 | } |
536 | list_del(&zhdr->buddy); | 749 | if (test_and_set_bit(NEEDS_COMPACTING, &page->private)) { |
537 | freechunks = num_free_chunks(zhdr); | ||
538 | list_add(&zhdr->buddy, &pool->unbuddied[freechunks]); | ||
539 | spin_unlock(&pool->lock); | ||
540 | } | ||
541 | z3fold_page_unlock(zhdr); | 750 | z3fold_page_unlock(zhdr); |
751 | return; | ||
752 | } | ||
753 | if (zhdr->cpu < 0 || !cpu_online(zhdr->cpu)) { | ||
542 | spin_lock(&pool->lock); | 754 | spin_lock(&pool->lock); |
543 | if (kref_put(&zhdr->refcount, release_z3fold_page)) | 755 | list_del_init(&zhdr->buddy); |
544 | atomic64_dec(&pool->pages_nr); | ||
545 | spin_unlock(&pool->lock); | 756 | spin_unlock(&pool->lock); |
757 | zhdr->cpu = -1; | ||
758 | do_compact_page(zhdr, true); | ||
759 | return; | ||
546 | } | 760 | } |
547 | 761 | queue_work_on(zhdr->cpu, pool->compact_wq, &zhdr->work); | |
762 | z3fold_page_unlock(zhdr); | ||
548 | } | 763 | } |
549 | 764 | ||
550 | /** | 765 | /** |
@@ -585,9 +800,10 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) | |||
585 | */ | 800 | */ |
586 | static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) | 801 | static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) |
587 | { | 802 | { |
588 | int i, ret = 0, freechunks; | 803 | int i, ret = 0; |
589 | struct z3fold_header *zhdr; | 804 | struct z3fold_header *zhdr = NULL; |
590 | struct page *page; | 805 | struct page *page = NULL; |
806 | struct list_head *pos; | ||
591 | unsigned long first_handle = 0, middle_handle = 0, last_handle = 0; | 807 | unsigned long first_handle = 0, middle_handle = 0, last_handle = 0; |
592 | 808 | ||
593 | spin_lock(&pool->lock); | 809 | spin_lock(&pool->lock); |
@@ -600,16 +816,24 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) | |||
600 | spin_unlock(&pool->lock); | 816 | spin_unlock(&pool->lock); |
601 | return -EINVAL; | 817 | return -EINVAL; |
602 | } | 818 | } |
603 | page = list_last_entry(&pool->lru, struct page, lru); | 819 | list_for_each_prev(pos, &pool->lru) { |
820 | page = list_entry(pos, struct page, lru); | ||
821 | if (test_bit(PAGE_HEADLESS, &page->private)) | ||
822 | /* candidate found */ | ||
823 | break; | ||
824 | |||
825 | zhdr = page_address(page); | ||
826 | if (!z3fold_page_trylock(zhdr)) | ||
827 | continue; /* can't evict at this point */ | ||
828 | kref_get(&zhdr->refcount); | ||
829 | list_del_init(&zhdr->buddy); | ||
830 | zhdr->cpu = -1; | ||
831 | } | ||
832 | |||
604 | list_del_init(&page->lru); | 833 | list_del_init(&page->lru); |
834 | spin_unlock(&pool->lock); | ||
605 | 835 | ||
606 | zhdr = page_address(page); | ||
607 | if (!test_bit(PAGE_HEADLESS, &page->private)) { | 836 | if (!test_bit(PAGE_HEADLESS, &page->private)) { |
608 | if (!list_empty(&zhdr->buddy)) | ||
609 | list_del_init(&zhdr->buddy); | ||
610 | kref_get(&zhdr->refcount); | ||
611 | spin_unlock(&pool->lock); | ||
612 | z3fold_page_lock(zhdr); | ||
613 | /* | 837 | /* |
614 | * We need encode the handles before unlocking, since | 838 | * We need encode the handles before unlocking, since |
615 | * we can race with free that will set | 839 | * we can race with free that will set |
@@ -624,11 +848,14 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) | |||
624 | middle_handle = encode_handle(zhdr, MIDDLE); | 848 | middle_handle = encode_handle(zhdr, MIDDLE); |
625 | if (zhdr->last_chunks) | 849 | if (zhdr->last_chunks) |
626 | last_handle = encode_handle(zhdr, LAST); | 850 | last_handle = encode_handle(zhdr, LAST); |
851 | /* | ||
852 | * it's safe to unlock here because we hold a | ||
853 | * reference to this page | ||
854 | */ | ||
627 | z3fold_page_unlock(zhdr); | 855 | z3fold_page_unlock(zhdr); |
628 | } else { | 856 | } else { |
629 | first_handle = encode_handle(zhdr, HEADLESS); | 857 | first_handle = encode_handle(zhdr, HEADLESS); |
630 | last_handle = middle_handle = 0; | 858 | last_handle = middle_handle = 0; |
631 | spin_unlock(&pool->lock); | ||
632 | } | 859 | } |
633 | 860 | ||
634 | /* Issue the eviction callback(s) */ | 861 | /* Issue the eviction callback(s) */ |
@@ -652,31 +879,12 @@ next: | |||
652 | if (ret == 0) { | 879 | if (ret == 0) { |
653 | free_z3fold_page(page); | 880 | free_z3fold_page(page); |
654 | return 0; | 881 | return 0; |
655 | } else { | ||
656 | spin_lock(&pool->lock); | ||
657 | } | ||
658 | } else { | ||
659 | z3fold_page_lock(zhdr); | ||
660 | if ((zhdr->first_chunks || zhdr->last_chunks || | ||
661 | zhdr->middle_chunks) && | ||
662 | !(zhdr->first_chunks && zhdr->last_chunks && | ||
663 | zhdr->middle_chunks)) { | ||
664 | z3fold_compact_page(zhdr); | ||
665 | /* add to unbuddied list */ | ||
666 | spin_lock(&pool->lock); | ||
667 | freechunks = num_free_chunks(zhdr); | ||
668 | list_add(&zhdr->buddy, | ||
669 | &pool->unbuddied[freechunks]); | ||
670 | spin_unlock(&pool->lock); | ||
671 | } | ||
672 | z3fold_page_unlock(zhdr); | ||
673 | spin_lock(&pool->lock); | ||
674 | if (kref_put(&zhdr->refcount, release_z3fold_page)) { | ||
675 | spin_unlock(&pool->lock); | ||
676 | atomic64_dec(&pool->pages_nr); | ||
677 | return 0; | ||
678 | } | 882 | } |
883 | } else if (kref_put(&zhdr->refcount, release_z3fold_page)) { | ||
884 | atomic64_dec(&pool->pages_nr); | ||
885 | return 0; | ||
679 | } | 886 | } |
887 | spin_lock(&pool->lock); | ||
680 | 888 | ||
681 | /* | 889 | /* |
682 | * Add to the beginning of LRU. | 890 | * Add to the beginning of LRU. |
@@ -795,7 +1003,8 @@ static void *z3fold_zpool_create(const char *name, gfp_t gfp, | |||
795 | { | 1003 | { |
796 | struct z3fold_pool *pool; | 1004 | struct z3fold_pool *pool; |
797 | 1005 | ||
798 | pool = z3fold_create_pool(gfp, zpool_ops ? &z3fold_zpool_ops : NULL); | 1006 | pool = z3fold_create_pool(name, gfp, |
1007 | zpool_ops ? &z3fold_zpool_ops : NULL); | ||
799 | if (pool) { | 1008 | if (pool) { |
800 | pool->zpool = zpool; | 1009 | pool->zpool = zpool; |
801 | pool->zpool_ops = zpool_ops; | 1010 | pool->zpool_ops = zpool_ops; |