diff options
Diffstat (limited to 'fs/mbcache.c')
-rw-r--r-- | fs/mbcache.c | 540 |
1 files changed, 386 insertions, 154 deletions
diff --git a/fs/mbcache.c b/fs/mbcache.c index e519e45bf673..bf166e388f0d 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c | |||
@@ -26,6 +26,41 @@ | |||
26 | * back on the lru list. | 26 | * back on the lru list. |
27 | */ | 27 | */ |
28 | 28 | ||
29 | /* | ||
30 | * Lock descriptions and usage: | ||
31 | * | ||
32 | * Each hash chain of both the block and index hash tables now contains | ||
33 | * a built-in lock used to serialize accesses to the hash chain. | ||
34 | * | ||
35 | * Accesses to global data structures mb_cache_list and mb_cache_lru_list | ||
36 | * are serialized via the global spinlock mb_cache_spinlock. | ||
37 | * | ||
38 | * Each mb_cache_entry contains a spinlock, e_entry_lock, to serialize | ||
39 | * accesses to its local data, such as e_used and e_queued. | ||
40 | * | ||
41 | * Lock ordering: | ||
42 | * | ||
43 | * Each block hash chain's lock has the highest lock order, followed by an | ||
44 | * index hash chain's lock, mb_cache_bg_lock (used to implement mb_cache_entry's | ||
45 | * lock), and mb_cach_spinlock, with the lowest order. While holding | ||
46 | * either a block or index hash chain lock, a thread can acquire an | ||
47 | * mc_cache_bg_lock, which in turn can also acquire mb_cache_spinlock. | ||
48 | * | ||
49 | * Synchronization: | ||
50 | * | ||
51 | * Since both mb_cache_entry_get and mb_cache_entry_find scan the block and | ||
52 | * index hash chian, it needs to lock the corresponding hash chain. For each | ||
53 | * mb_cache_entry within the chain, it needs to lock the mb_cache_entry to | ||
54 | * prevent either any simultaneous release or free on the entry and also | ||
55 | * to serialize accesses to either the e_used or e_queued member of the entry. | ||
56 | * | ||
57 | * To avoid having a dangling reference to an already freed | ||
58 | * mb_cache_entry, an mb_cache_entry is only freed when it is not on a | ||
59 | * block hash chain and also no longer being referenced, both e_used, | ||
60 | * and e_queued are 0's. When an mb_cache_entry is explicitly freed it is | ||
61 | * first removed from a block hash chain. | ||
62 | */ | ||
63 | |||
29 | #include <linux/kernel.h> | 64 | #include <linux/kernel.h> |
30 | #include <linux/module.h> | 65 | #include <linux/module.h> |
31 | 66 | ||
@@ -34,9 +69,10 @@ | |||
34 | #include <linux/mm.h> | 69 | #include <linux/mm.h> |
35 | #include <linux/slab.h> | 70 | #include <linux/slab.h> |
36 | #include <linux/sched.h> | 71 | #include <linux/sched.h> |
37 | #include <linux/init.h> | 72 | #include <linux/list_bl.h> |
38 | #include <linux/mbcache.h> | 73 | #include <linux/mbcache.h> |
39 | 74 | #include <linux/init.h> | |
75 | #include <linux/blockgroup_lock.h> | ||
40 | 76 | ||
41 | #ifdef MB_CACHE_DEBUG | 77 | #ifdef MB_CACHE_DEBUG |
42 | # define mb_debug(f...) do { \ | 78 | # define mb_debug(f...) do { \ |
@@ -57,8 +93,14 @@ | |||
57 | 93 | ||
58 | #define MB_CACHE_WRITER ((unsigned short)~0U >> 1) | 94 | #define MB_CACHE_WRITER ((unsigned short)~0U >> 1) |
59 | 95 | ||
96 | #define MB_CACHE_ENTRY_LOCK_BITS __builtin_log2(NR_BG_LOCKS) | ||
97 | #define MB_CACHE_ENTRY_LOCK_INDEX(ce) \ | ||
98 | (hash_long((unsigned long)ce, MB_CACHE_ENTRY_LOCK_BITS)) | ||
99 | |||
60 | static DECLARE_WAIT_QUEUE_HEAD(mb_cache_queue); | 100 | static DECLARE_WAIT_QUEUE_HEAD(mb_cache_queue); |
61 | 101 | static struct blockgroup_lock *mb_cache_bg_lock; | |
102 | static struct kmem_cache *mb_cache_kmem_cache; | ||
103 | |||
62 | MODULE_AUTHOR("Andreas Gruenbacher <a.gruenbacher@computer.org>"); | 104 | MODULE_AUTHOR("Andreas Gruenbacher <a.gruenbacher@computer.org>"); |
63 | MODULE_DESCRIPTION("Meta block cache (for extended attributes)"); | 105 | MODULE_DESCRIPTION("Meta block cache (for extended attributes)"); |
64 | MODULE_LICENSE("GPL"); | 106 | MODULE_LICENSE("GPL"); |
@@ -86,58 +128,110 @@ static LIST_HEAD(mb_cache_list); | |||
86 | static LIST_HEAD(mb_cache_lru_list); | 128 | static LIST_HEAD(mb_cache_lru_list); |
87 | static DEFINE_SPINLOCK(mb_cache_spinlock); | 129 | static DEFINE_SPINLOCK(mb_cache_spinlock); |
88 | 130 | ||
131 | static inline void | ||
132 | __spin_lock_mb_cache_entry(struct mb_cache_entry *ce) | ||
133 | { | ||
134 | spin_lock(bgl_lock_ptr(mb_cache_bg_lock, | ||
135 | MB_CACHE_ENTRY_LOCK_INDEX(ce))); | ||
136 | } | ||
137 | |||
138 | static inline void | ||
139 | __spin_unlock_mb_cache_entry(struct mb_cache_entry *ce) | ||
140 | { | ||
141 | spin_unlock(bgl_lock_ptr(mb_cache_bg_lock, | ||
142 | MB_CACHE_ENTRY_LOCK_INDEX(ce))); | ||
143 | } | ||
144 | |||
89 | static inline int | 145 | static inline int |
90 | __mb_cache_entry_is_hashed(struct mb_cache_entry *ce) | 146 | __mb_cache_entry_is_block_hashed(struct mb_cache_entry *ce) |
91 | { | 147 | { |
92 | return !list_empty(&ce->e_block_list); | 148 | return !hlist_bl_unhashed(&ce->e_block_list); |
93 | } | 149 | } |
94 | 150 | ||
95 | 151 | ||
96 | static void | 152 | static inline void |
97 | __mb_cache_entry_unhash(struct mb_cache_entry *ce) | 153 | __mb_cache_entry_unhash_block(struct mb_cache_entry *ce) |
98 | { | 154 | { |
99 | if (__mb_cache_entry_is_hashed(ce)) { | 155 | if (__mb_cache_entry_is_block_hashed(ce)) |
100 | list_del_init(&ce->e_block_list); | 156 | hlist_bl_del_init(&ce->e_block_list); |
101 | list_del(&ce->e_index.o_list); | ||
102 | } | ||
103 | } | 157 | } |
104 | 158 | ||
159 | static inline int | ||
160 | __mb_cache_entry_is_index_hashed(struct mb_cache_entry *ce) | ||
161 | { | ||
162 | return !hlist_bl_unhashed(&ce->e_index.o_list); | ||
163 | } | ||
164 | |||
165 | static inline void | ||
166 | __mb_cache_entry_unhash_index(struct mb_cache_entry *ce) | ||
167 | { | ||
168 | if (__mb_cache_entry_is_index_hashed(ce)) | ||
169 | hlist_bl_del_init(&ce->e_index.o_list); | ||
170 | } | ||
171 | |||
172 | /* | ||
173 | * __mb_cache_entry_unhash_unlock() | ||
174 | * | ||
175 | * This function is called to unhash both the block and index hash | ||
176 | * chain. | ||
177 | * It assumes both the block and index hash chain is locked upon entry. | ||
178 | * It also unlock both hash chains both exit | ||
179 | */ | ||
180 | static inline void | ||
181 | __mb_cache_entry_unhash_unlock(struct mb_cache_entry *ce) | ||
182 | { | ||
183 | __mb_cache_entry_unhash_index(ce); | ||
184 | hlist_bl_unlock(ce->e_index_hash_p); | ||
185 | __mb_cache_entry_unhash_block(ce); | ||
186 | hlist_bl_unlock(ce->e_block_hash_p); | ||
187 | } | ||
105 | 188 | ||
106 | static void | 189 | static void |
107 | __mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask) | 190 | __mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask) |
108 | { | 191 | { |
109 | struct mb_cache *cache = ce->e_cache; | 192 | struct mb_cache *cache = ce->e_cache; |
110 | 193 | ||
111 | mb_assert(!(ce->e_used || ce->e_queued)); | 194 | mb_assert(!(ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt))); |
112 | kmem_cache_free(cache->c_entry_cache, ce); | 195 | kmem_cache_free(cache->c_entry_cache, ce); |
113 | atomic_dec(&cache->c_entry_count); | 196 | atomic_dec(&cache->c_entry_count); |
114 | } | 197 | } |
115 | 198 | ||
116 | |||
117 | static void | 199 | static void |
118 | __mb_cache_entry_release_unlock(struct mb_cache_entry *ce) | 200 | __mb_cache_entry_release(struct mb_cache_entry *ce) |
119 | __releases(mb_cache_spinlock) | ||
120 | { | 201 | { |
202 | /* First lock the entry to serialize access to its local data. */ | ||
203 | __spin_lock_mb_cache_entry(ce); | ||
121 | /* Wake up all processes queuing for this cache entry. */ | 204 | /* Wake up all processes queuing for this cache entry. */ |
122 | if (ce->e_queued) | 205 | if (ce->e_queued) |
123 | wake_up_all(&mb_cache_queue); | 206 | wake_up_all(&mb_cache_queue); |
124 | if (ce->e_used >= MB_CACHE_WRITER) | 207 | if (ce->e_used >= MB_CACHE_WRITER) |
125 | ce->e_used -= MB_CACHE_WRITER; | 208 | ce->e_used -= MB_CACHE_WRITER; |
209 | /* | ||
210 | * Make sure that all cache entries on lru_list have | ||
211 | * both e_used and e_qued of 0s. | ||
212 | */ | ||
126 | ce->e_used--; | 213 | ce->e_used--; |
127 | if (!(ce->e_used || ce->e_queued)) { | 214 | if (!(ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt))) { |
128 | if (!__mb_cache_entry_is_hashed(ce)) | 215 | if (!__mb_cache_entry_is_block_hashed(ce)) { |
216 | __spin_unlock_mb_cache_entry(ce); | ||
129 | goto forget; | 217 | goto forget; |
130 | mb_assert(list_empty(&ce->e_lru_list)); | 218 | } |
131 | list_add_tail(&ce->e_lru_list, &mb_cache_lru_list); | 219 | /* |
220 | * Need access to lru list, first drop entry lock, | ||
221 | * then reacquire the lock in the proper order. | ||
222 | */ | ||
223 | spin_lock(&mb_cache_spinlock); | ||
224 | if (list_empty(&ce->e_lru_list)) | ||
225 | list_add_tail(&ce->e_lru_list, &mb_cache_lru_list); | ||
226 | spin_unlock(&mb_cache_spinlock); | ||
132 | } | 227 | } |
133 | spin_unlock(&mb_cache_spinlock); | 228 | __spin_unlock_mb_cache_entry(ce); |
134 | return; | 229 | return; |
135 | forget: | 230 | forget: |
136 | spin_unlock(&mb_cache_spinlock); | 231 | mb_assert(list_empty(&ce->e_lru_list)); |
137 | __mb_cache_entry_forget(ce, GFP_KERNEL); | 232 | __mb_cache_entry_forget(ce, GFP_KERNEL); |
138 | } | 233 | } |
139 | 234 | ||
140 | |||
141 | /* | 235 | /* |
142 | * mb_cache_shrink_scan() memory pressure callback | 236 | * mb_cache_shrink_scan() memory pressure callback |
143 | * | 237 | * |
@@ -160,17 +254,34 @@ mb_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) | |||
160 | 254 | ||
161 | mb_debug("trying to free %d entries", nr_to_scan); | 255 | mb_debug("trying to free %d entries", nr_to_scan); |
162 | spin_lock(&mb_cache_spinlock); | 256 | spin_lock(&mb_cache_spinlock); |
163 | while (nr_to_scan-- && !list_empty(&mb_cache_lru_list)) { | 257 | while ((nr_to_scan-- > 0) && !list_empty(&mb_cache_lru_list)) { |
164 | struct mb_cache_entry *ce = | 258 | struct mb_cache_entry *ce = |
165 | list_entry(mb_cache_lru_list.next, | 259 | list_entry(mb_cache_lru_list.next, |
166 | struct mb_cache_entry, e_lru_list); | 260 | struct mb_cache_entry, e_lru_list); |
167 | list_move_tail(&ce->e_lru_list, &free_list); | 261 | list_del_init(&ce->e_lru_list); |
168 | __mb_cache_entry_unhash(ce); | 262 | if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt)) |
169 | freed++; | 263 | continue; |
264 | spin_unlock(&mb_cache_spinlock); | ||
265 | /* Prevent any find or get operation on the entry */ | ||
266 | hlist_bl_lock(ce->e_block_hash_p); | ||
267 | hlist_bl_lock(ce->e_index_hash_p); | ||
268 | /* Ignore if it is touched by a find/get */ | ||
269 | if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt) || | ||
270 | !list_empty(&ce->e_lru_list)) { | ||
271 | hlist_bl_unlock(ce->e_index_hash_p); | ||
272 | hlist_bl_unlock(ce->e_block_hash_p); | ||
273 | spin_lock(&mb_cache_spinlock); | ||
274 | continue; | ||
275 | } | ||
276 | __mb_cache_entry_unhash_unlock(ce); | ||
277 | list_add_tail(&ce->e_lru_list, &free_list); | ||
278 | spin_lock(&mb_cache_spinlock); | ||
170 | } | 279 | } |
171 | spin_unlock(&mb_cache_spinlock); | 280 | spin_unlock(&mb_cache_spinlock); |
281 | |||
172 | list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) { | 282 | list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) { |
173 | __mb_cache_entry_forget(entry, gfp_mask); | 283 | __mb_cache_entry_forget(entry, gfp_mask); |
284 | freed++; | ||
174 | } | 285 | } |
175 | return freed; | 286 | return freed; |
176 | } | 287 | } |
@@ -215,29 +326,40 @@ mb_cache_create(const char *name, int bucket_bits) | |||
215 | int n, bucket_count = 1 << bucket_bits; | 326 | int n, bucket_count = 1 << bucket_bits; |
216 | struct mb_cache *cache = NULL; | 327 | struct mb_cache *cache = NULL; |
217 | 328 | ||
329 | if (!mb_cache_bg_lock) { | ||
330 | mb_cache_bg_lock = kmalloc(sizeof(struct blockgroup_lock), | ||
331 | GFP_KERNEL); | ||
332 | if (!mb_cache_bg_lock) | ||
333 | return NULL; | ||
334 | bgl_lock_init(mb_cache_bg_lock); | ||
335 | } | ||
336 | |||
218 | cache = kmalloc(sizeof(struct mb_cache), GFP_KERNEL); | 337 | cache = kmalloc(sizeof(struct mb_cache), GFP_KERNEL); |
219 | if (!cache) | 338 | if (!cache) |
220 | return NULL; | 339 | return NULL; |
221 | cache->c_name = name; | 340 | cache->c_name = name; |
222 | atomic_set(&cache->c_entry_count, 0); | 341 | atomic_set(&cache->c_entry_count, 0); |
223 | cache->c_bucket_bits = bucket_bits; | 342 | cache->c_bucket_bits = bucket_bits; |
224 | cache->c_block_hash = kmalloc(bucket_count * sizeof(struct list_head), | 343 | cache->c_block_hash = kmalloc(bucket_count * |
225 | GFP_KERNEL); | 344 | sizeof(struct hlist_bl_head), GFP_KERNEL); |
226 | if (!cache->c_block_hash) | 345 | if (!cache->c_block_hash) |
227 | goto fail; | 346 | goto fail; |
228 | for (n=0; n<bucket_count; n++) | 347 | for (n=0; n<bucket_count; n++) |
229 | INIT_LIST_HEAD(&cache->c_block_hash[n]); | 348 | INIT_HLIST_BL_HEAD(&cache->c_block_hash[n]); |
230 | cache->c_index_hash = kmalloc(bucket_count * sizeof(struct list_head), | 349 | cache->c_index_hash = kmalloc(bucket_count * |
231 | GFP_KERNEL); | 350 | sizeof(struct hlist_bl_head), GFP_KERNEL); |
232 | if (!cache->c_index_hash) | 351 | if (!cache->c_index_hash) |
233 | goto fail; | 352 | goto fail; |
234 | for (n=0; n<bucket_count; n++) | 353 | for (n=0; n<bucket_count; n++) |
235 | INIT_LIST_HEAD(&cache->c_index_hash[n]); | 354 | INIT_HLIST_BL_HEAD(&cache->c_index_hash[n]); |
236 | cache->c_entry_cache = kmem_cache_create(name, | 355 | if (!mb_cache_kmem_cache) { |
237 | sizeof(struct mb_cache_entry), 0, | 356 | mb_cache_kmem_cache = kmem_cache_create(name, |
238 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL); | 357 | sizeof(struct mb_cache_entry), 0, |
239 | if (!cache->c_entry_cache) | 358 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL); |
240 | goto fail2; | 359 | if (!mb_cache_kmem_cache) |
360 | goto fail2; | ||
361 | } | ||
362 | cache->c_entry_cache = mb_cache_kmem_cache; | ||
241 | 363 | ||
242 | /* | 364 | /* |
243 | * Set an upper limit on the number of cache entries so that the hash | 365 | * Set an upper limit on the number of cache entries so that the hash |
@@ -273,21 +395,47 @@ void | |||
273 | mb_cache_shrink(struct block_device *bdev) | 395 | mb_cache_shrink(struct block_device *bdev) |
274 | { | 396 | { |
275 | LIST_HEAD(free_list); | 397 | LIST_HEAD(free_list); |
276 | struct list_head *l, *ltmp; | 398 | struct list_head *l; |
399 | struct mb_cache_entry *ce, *tmp; | ||
277 | 400 | ||
401 | l = &mb_cache_lru_list; | ||
278 | spin_lock(&mb_cache_spinlock); | 402 | spin_lock(&mb_cache_spinlock); |
279 | list_for_each_safe(l, ltmp, &mb_cache_lru_list) { | 403 | while (!list_is_last(l, &mb_cache_lru_list)) { |
280 | struct mb_cache_entry *ce = | 404 | l = l->next; |
281 | list_entry(l, struct mb_cache_entry, e_lru_list); | 405 | ce = list_entry(l, struct mb_cache_entry, e_lru_list); |
282 | if (ce->e_bdev == bdev) { | 406 | if (ce->e_bdev == bdev) { |
283 | list_move_tail(&ce->e_lru_list, &free_list); | 407 | list_del_init(&ce->e_lru_list); |
284 | __mb_cache_entry_unhash(ce); | 408 | if (ce->e_used || ce->e_queued || |
409 | atomic_read(&ce->e_refcnt)) | ||
410 | continue; | ||
411 | spin_unlock(&mb_cache_spinlock); | ||
412 | /* | ||
413 | * Prevent any find or get operation on the entry. | ||
414 | */ | ||
415 | hlist_bl_lock(ce->e_block_hash_p); | ||
416 | hlist_bl_lock(ce->e_index_hash_p); | ||
417 | /* Ignore if it is touched by a find/get */ | ||
418 | if (ce->e_used || ce->e_queued || | ||
419 | atomic_read(&ce->e_refcnt) || | ||
420 | !list_empty(&ce->e_lru_list)) { | ||
421 | hlist_bl_unlock(ce->e_index_hash_p); | ||
422 | hlist_bl_unlock(ce->e_block_hash_p); | ||
423 | l = &mb_cache_lru_list; | ||
424 | spin_lock(&mb_cache_spinlock); | ||
425 | continue; | ||
426 | } | ||
427 | __mb_cache_entry_unhash_unlock(ce); | ||
428 | mb_assert(!(ce->e_used || ce->e_queued || | ||
429 | atomic_read(&ce->e_refcnt))); | ||
430 | list_add_tail(&ce->e_lru_list, &free_list); | ||
431 | l = &mb_cache_lru_list; | ||
432 | spin_lock(&mb_cache_spinlock); | ||
285 | } | 433 | } |
286 | } | 434 | } |
287 | spin_unlock(&mb_cache_spinlock); | 435 | spin_unlock(&mb_cache_spinlock); |
288 | list_for_each_safe(l, ltmp, &free_list) { | 436 | |
289 | __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry, | 437 | list_for_each_entry_safe(ce, tmp, &free_list, e_lru_list) { |
290 | e_lru_list), GFP_KERNEL); | 438 | __mb_cache_entry_forget(ce, GFP_KERNEL); |
291 | } | 439 | } |
292 | } | 440 | } |
293 | 441 | ||
@@ -303,23 +451,27 @@ void | |||
303 | mb_cache_destroy(struct mb_cache *cache) | 451 | mb_cache_destroy(struct mb_cache *cache) |
304 | { | 452 | { |
305 | LIST_HEAD(free_list); | 453 | LIST_HEAD(free_list); |
306 | struct list_head *l, *ltmp; | 454 | struct mb_cache_entry *ce, *tmp; |
307 | 455 | ||
308 | spin_lock(&mb_cache_spinlock); | 456 | spin_lock(&mb_cache_spinlock); |
309 | list_for_each_safe(l, ltmp, &mb_cache_lru_list) { | 457 | list_for_each_entry_safe(ce, tmp, &mb_cache_lru_list, e_lru_list) { |
310 | struct mb_cache_entry *ce = | 458 | if (ce->e_cache == cache) |
311 | list_entry(l, struct mb_cache_entry, e_lru_list); | ||
312 | if (ce->e_cache == cache) { | ||
313 | list_move_tail(&ce->e_lru_list, &free_list); | 459 | list_move_tail(&ce->e_lru_list, &free_list); |
314 | __mb_cache_entry_unhash(ce); | ||
315 | } | ||
316 | } | 460 | } |
317 | list_del(&cache->c_cache_list); | 461 | list_del(&cache->c_cache_list); |
318 | spin_unlock(&mb_cache_spinlock); | 462 | spin_unlock(&mb_cache_spinlock); |
319 | 463 | ||
320 | list_for_each_safe(l, ltmp, &free_list) { | 464 | list_for_each_entry_safe(ce, tmp, &free_list, e_lru_list) { |
321 | __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry, | 465 | list_del_init(&ce->e_lru_list); |
322 | e_lru_list), GFP_KERNEL); | 466 | /* |
467 | * Prevent any find or get operation on the entry. | ||
468 | */ | ||
469 | hlist_bl_lock(ce->e_block_hash_p); | ||
470 | hlist_bl_lock(ce->e_index_hash_p); | ||
471 | mb_assert(!(ce->e_used || ce->e_queued || | ||
472 | atomic_read(&ce->e_refcnt))); | ||
473 | __mb_cache_entry_unhash_unlock(ce); | ||
474 | __mb_cache_entry_forget(ce, GFP_KERNEL); | ||
323 | } | 475 | } |
324 | 476 | ||
325 | if (atomic_read(&cache->c_entry_count) > 0) { | 477 | if (atomic_read(&cache->c_entry_count) > 0) { |
@@ -328,8 +480,10 @@ mb_cache_destroy(struct mb_cache *cache) | |||
328 | atomic_read(&cache->c_entry_count)); | 480 | atomic_read(&cache->c_entry_count)); |
329 | } | 481 | } |
330 | 482 | ||
331 | kmem_cache_destroy(cache->c_entry_cache); | 483 | if (list_empty(&mb_cache_list)) { |
332 | 484 | kmem_cache_destroy(mb_cache_kmem_cache); | |
485 | mb_cache_kmem_cache = NULL; | ||
486 | } | ||
333 | kfree(cache->c_index_hash); | 487 | kfree(cache->c_index_hash); |
334 | kfree(cache->c_block_hash); | 488 | kfree(cache->c_block_hash); |
335 | kfree(cache); | 489 | kfree(cache); |
@@ -346,28 +500,61 @@ mb_cache_destroy(struct mb_cache *cache) | |||
346 | struct mb_cache_entry * | 500 | struct mb_cache_entry * |
347 | mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags) | 501 | mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags) |
348 | { | 502 | { |
349 | struct mb_cache_entry *ce = NULL; | 503 | struct mb_cache_entry *ce; |
350 | 504 | ||
351 | if (atomic_read(&cache->c_entry_count) >= cache->c_max_entries) { | 505 | if (atomic_read(&cache->c_entry_count) >= cache->c_max_entries) { |
506 | struct list_head *l; | ||
507 | |||
508 | l = &mb_cache_lru_list; | ||
352 | spin_lock(&mb_cache_spinlock); | 509 | spin_lock(&mb_cache_spinlock); |
353 | if (!list_empty(&mb_cache_lru_list)) { | 510 | while (!list_is_last(l, &mb_cache_lru_list)) { |
354 | ce = list_entry(mb_cache_lru_list.next, | 511 | l = l->next; |
355 | struct mb_cache_entry, e_lru_list); | 512 | ce = list_entry(l, struct mb_cache_entry, e_lru_list); |
356 | list_del_init(&ce->e_lru_list); | 513 | if (ce->e_cache == cache) { |
357 | __mb_cache_entry_unhash(ce); | 514 | list_del_init(&ce->e_lru_list); |
515 | if (ce->e_used || ce->e_queued || | ||
516 | atomic_read(&ce->e_refcnt)) | ||
517 | continue; | ||
518 | spin_unlock(&mb_cache_spinlock); | ||
519 | /* | ||
520 | * Prevent any find or get operation on the | ||
521 | * entry. | ||
522 | */ | ||
523 | hlist_bl_lock(ce->e_block_hash_p); | ||
524 | hlist_bl_lock(ce->e_index_hash_p); | ||
525 | /* Ignore if it is touched by a find/get */ | ||
526 | if (ce->e_used || ce->e_queued || | ||
527 | atomic_read(&ce->e_refcnt) || | ||
528 | !list_empty(&ce->e_lru_list)) { | ||
529 | hlist_bl_unlock(ce->e_index_hash_p); | ||
530 | hlist_bl_unlock(ce->e_block_hash_p); | ||
531 | l = &mb_cache_lru_list; | ||
532 | spin_lock(&mb_cache_spinlock); | ||
533 | continue; | ||
534 | } | ||
535 | mb_assert(list_empty(&ce->e_lru_list)); | ||
536 | mb_assert(!(ce->e_used || ce->e_queued || | ||
537 | atomic_read(&ce->e_refcnt))); | ||
538 | __mb_cache_entry_unhash_unlock(ce); | ||
539 | goto found; | ||
540 | } | ||
358 | } | 541 | } |
359 | spin_unlock(&mb_cache_spinlock); | 542 | spin_unlock(&mb_cache_spinlock); |
360 | } | 543 | } |
361 | if (!ce) { | 544 | |
362 | ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags); | 545 | ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags); |
363 | if (!ce) | 546 | if (!ce) |
364 | return NULL; | 547 | return NULL; |
365 | atomic_inc(&cache->c_entry_count); | 548 | atomic_inc(&cache->c_entry_count); |
366 | INIT_LIST_HEAD(&ce->e_lru_list); | 549 | INIT_LIST_HEAD(&ce->e_lru_list); |
367 | INIT_LIST_HEAD(&ce->e_block_list); | 550 | INIT_HLIST_BL_NODE(&ce->e_block_list); |
368 | ce->e_cache = cache; | 551 | INIT_HLIST_BL_NODE(&ce->e_index.o_list); |
369 | ce->e_queued = 0; | 552 | ce->e_cache = cache; |
370 | } | 553 | ce->e_queued = 0; |
554 | atomic_set(&ce->e_refcnt, 0); | ||
555 | found: | ||
556 | ce->e_block_hash_p = &cache->c_block_hash[0]; | ||
557 | ce->e_index_hash_p = &cache->c_index_hash[0]; | ||
371 | ce->e_used = 1 + MB_CACHE_WRITER; | 558 | ce->e_used = 1 + MB_CACHE_WRITER; |
372 | return ce; | 559 | return ce; |
373 | } | 560 | } |
@@ -393,29 +580,38 @@ mb_cache_entry_insert(struct mb_cache_entry *ce, struct block_device *bdev, | |||
393 | { | 580 | { |
394 | struct mb_cache *cache = ce->e_cache; | 581 | struct mb_cache *cache = ce->e_cache; |
395 | unsigned int bucket; | 582 | unsigned int bucket; |
396 | struct list_head *l; | 583 | struct hlist_bl_node *l; |
397 | int error = -EBUSY; | 584 | struct hlist_bl_head *block_hash_p; |
585 | struct hlist_bl_head *index_hash_p; | ||
586 | struct mb_cache_entry *lce; | ||
398 | 587 | ||
588 | mb_assert(ce); | ||
399 | bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), | 589 | bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), |
400 | cache->c_bucket_bits); | 590 | cache->c_bucket_bits); |
401 | spin_lock(&mb_cache_spinlock); | 591 | block_hash_p = &cache->c_block_hash[bucket]; |
402 | list_for_each_prev(l, &cache->c_block_hash[bucket]) { | 592 | hlist_bl_lock(block_hash_p); |
403 | struct mb_cache_entry *ce = | 593 | hlist_bl_for_each_entry(lce, l, block_hash_p, e_block_list) { |
404 | list_entry(l, struct mb_cache_entry, e_block_list); | 594 | if (lce->e_bdev == bdev && lce->e_block == block) { |
405 | if (ce->e_bdev == bdev && ce->e_block == block) | 595 | hlist_bl_unlock(block_hash_p); |
406 | goto out; | 596 | return -EBUSY; |
597 | } | ||
407 | } | 598 | } |
408 | __mb_cache_entry_unhash(ce); | 599 | mb_assert(!__mb_cache_entry_is_block_hashed(ce)); |
600 | __mb_cache_entry_unhash_block(ce); | ||
601 | __mb_cache_entry_unhash_index(ce); | ||
409 | ce->e_bdev = bdev; | 602 | ce->e_bdev = bdev; |
410 | ce->e_block = block; | 603 | ce->e_block = block; |
411 | list_add(&ce->e_block_list, &cache->c_block_hash[bucket]); | 604 | ce->e_block_hash_p = block_hash_p; |
412 | ce->e_index.o_key = key; | 605 | ce->e_index.o_key = key; |
606 | hlist_bl_add_head(&ce->e_block_list, block_hash_p); | ||
607 | hlist_bl_unlock(block_hash_p); | ||
413 | bucket = hash_long(key, cache->c_bucket_bits); | 608 | bucket = hash_long(key, cache->c_bucket_bits); |
414 | list_add(&ce->e_index.o_list, &cache->c_index_hash[bucket]); | 609 | index_hash_p = &cache->c_index_hash[bucket]; |
415 | error = 0; | 610 | hlist_bl_lock(index_hash_p); |
416 | out: | 611 | ce->e_index_hash_p = index_hash_p; |
417 | spin_unlock(&mb_cache_spinlock); | 612 | hlist_bl_add_head(&ce->e_index.o_list, index_hash_p); |
418 | return error; | 613 | hlist_bl_unlock(index_hash_p); |
614 | return 0; | ||
419 | } | 615 | } |
420 | 616 | ||
421 | 617 | ||
@@ -429,24 +625,26 @@ out: | |||
429 | void | 625 | void |
430 | mb_cache_entry_release(struct mb_cache_entry *ce) | 626 | mb_cache_entry_release(struct mb_cache_entry *ce) |
431 | { | 627 | { |
432 | spin_lock(&mb_cache_spinlock); | 628 | __mb_cache_entry_release(ce); |
433 | __mb_cache_entry_release_unlock(ce); | ||
434 | } | 629 | } |
435 | 630 | ||
436 | 631 | ||
437 | /* | 632 | /* |
438 | * mb_cache_entry_free() | 633 | * mb_cache_entry_free() |
439 | * | 634 | * |
440 | * This is equivalent to the sequence mb_cache_entry_takeout() -- | ||
441 | * mb_cache_entry_release(). | ||
442 | */ | 635 | */ |
443 | void | 636 | void |
444 | mb_cache_entry_free(struct mb_cache_entry *ce) | 637 | mb_cache_entry_free(struct mb_cache_entry *ce) |
445 | { | 638 | { |
446 | spin_lock(&mb_cache_spinlock); | 639 | mb_assert(ce); |
447 | mb_assert(list_empty(&ce->e_lru_list)); | 640 | mb_assert(list_empty(&ce->e_lru_list)); |
448 | __mb_cache_entry_unhash(ce); | 641 | hlist_bl_lock(ce->e_index_hash_p); |
449 | __mb_cache_entry_release_unlock(ce); | 642 | __mb_cache_entry_unhash_index(ce); |
643 | hlist_bl_unlock(ce->e_index_hash_p); | ||
644 | hlist_bl_lock(ce->e_block_hash_p); | ||
645 | __mb_cache_entry_unhash_block(ce); | ||
646 | hlist_bl_unlock(ce->e_block_hash_p); | ||
647 | __mb_cache_entry_release(ce); | ||
450 | } | 648 | } |
451 | 649 | ||
452 | 650 | ||
@@ -463,84 +661,110 @@ mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev, | |||
463 | sector_t block) | 661 | sector_t block) |
464 | { | 662 | { |
465 | unsigned int bucket; | 663 | unsigned int bucket; |
466 | struct list_head *l; | 664 | struct hlist_bl_node *l; |
467 | struct mb_cache_entry *ce; | 665 | struct mb_cache_entry *ce; |
666 | struct hlist_bl_head *block_hash_p; | ||
468 | 667 | ||
469 | bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), | 668 | bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), |
470 | cache->c_bucket_bits); | 669 | cache->c_bucket_bits); |
471 | spin_lock(&mb_cache_spinlock); | 670 | block_hash_p = &cache->c_block_hash[bucket]; |
472 | list_for_each(l, &cache->c_block_hash[bucket]) { | 671 | /* First serialize access to the block corresponding hash chain. */ |
473 | ce = list_entry(l, struct mb_cache_entry, e_block_list); | 672 | hlist_bl_lock(block_hash_p); |
673 | hlist_bl_for_each_entry(ce, l, block_hash_p, e_block_list) { | ||
674 | mb_assert(ce->e_block_hash_p == block_hash_p); | ||
474 | if (ce->e_bdev == bdev && ce->e_block == block) { | 675 | if (ce->e_bdev == bdev && ce->e_block == block) { |
475 | DEFINE_WAIT(wait); | 676 | /* |
677 | * Prevent a free from removing the entry. | ||
678 | */ | ||
679 | atomic_inc(&ce->e_refcnt); | ||
680 | hlist_bl_unlock(block_hash_p); | ||
681 | __spin_lock_mb_cache_entry(ce); | ||
682 | atomic_dec(&ce->e_refcnt); | ||
683 | if (ce->e_used > 0) { | ||
684 | DEFINE_WAIT(wait); | ||
685 | while (ce->e_used > 0) { | ||
686 | ce->e_queued++; | ||
687 | prepare_to_wait(&mb_cache_queue, &wait, | ||
688 | TASK_UNINTERRUPTIBLE); | ||
689 | __spin_unlock_mb_cache_entry(ce); | ||
690 | schedule(); | ||
691 | __spin_lock_mb_cache_entry(ce); | ||
692 | ce->e_queued--; | ||
693 | } | ||
694 | finish_wait(&mb_cache_queue, &wait); | ||
695 | } | ||
696 | ce->e_used += 1 + MB_CACHE_WRITER; | ||
697 | __spin_unlock_mb_cache_entry(ce); | ||
476 | 698 | ||
477 | if (!list_empty(&ce->e_lru_list)) | 699 | if (!list_empty(&ce->e_lru_list)) { |
700 | spin_lock(&mb_cache_spinlock); | ||
478 | list_del_init(&ce->e_lru_list); | 701 | list_del_init(&ce->e_lru_list); |
479 | |||
480 | while (ce->e_used > 0) { | ||
481 | ce->e_queued++; | ||
482 | prepare_to_wait(&mb_cache_queue, &wait, | ||
483 | TASK_UNINTERRUPTIBLE); | ||
484 | spin_unlock(&mb_cache_spinlock); | 702 | spin_unlock(&mb_cache_spinlock); |
485 | schedule(); | ||
486 | spin_lock(&mb_cache_spinlock); | ||
487 | ce->e_queued--; | ||
488 | } | 703 | } |
489 | finish_wait(&mb_cache_queue, &wait); | 704 | if (!__mb_cache_entry_is_block_hashed(ce)) { |
490 | ce->e_used += 1 + MB_CACHE_WRITER; | 705 | __mb_cache_entry_release(ce); |
491 | |||
492 | if (!__mb_cache_entry_is_hashed(ce)) { | ||
493 | __mb_cache_entry_release_unlock(ce); | ||
494 | return NULL; | 706 | return NULL; |
495 | } | 707 | } |
496 | goto cleanup; | 708 | return ce; |
497 | } | 709 | } |
498 | } | 710 | } |
499 | ce = NULL; | 711 | hlist_bl_unlock(block_hash_p); |
500 | 712 | return NULL; | |
501 | cleanup: | ||
502 | spin_unlock(&mb_cache_spinlock); | ||
503 | return ce; | ||
504 | } | 713 | } |
505 | 714 | ||
506 | #if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) | 715 | #if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) |
507 | 716 | ||
508 | static struct mb_cache_entry * | 717 | static struct mb_cache_entry * |
509 | __mb_cache_entry_find(struct list_head *l, struct list_head *head, | 718 | __mb_cache_entry_find(struct hlist_bl_node *l, struct hlist_bl_head *head, |
510 | struct block_device *bdev, unsigned int key) | 719 | struct block_device *bdev, unsigned int key) |
511 | { | 720 | { |
512 | while (l != head) { | 721 | |
722 | /* The index hash chain is alredy acquire by caller. */ | ||
723 | while (l != NULL) { | ||
513 | struct mb_cache_entry *ce = | 724 | struct mb_cache_entry *ce = |
514 | list_entry(l, struct mb_cache_entry, e_index.o_list); | 725 | hlist_bl_entry(l, struct mb_cache_entry, |
726 | e_index.o_list); | ||
727 | mb_assert(ce->e_index_hash_p == head); | ||
515 | if (ce->e_bdev == bdev && ce->e_index.o_key == key) { | 728 | if (ce->e_bdev == bdev && ce->e_index.o_key == key) { |
516 | DEFINE_WAIT(wait); | 729 | /* |
517 | 730 | * Prevent a free from removing the entry. | |
518 | if (!list_empty(&ce->e_lru_list)) | 731 | */ |
519 | list_del_init(&ce->e_lru_list); | 732 | atomic_inc(&ce->e_refcnt); |
520 | 733 | hlist_bl_unlock(head); | |
734 | __spin_lock_mb_cache_entry(ce); | ||
735 | atomic_dec(&ce->e_refcnt); | ||
736 | ce->e_used++; | ||
521 | /* Incrementing before holding the lock gives readers | 737 | /* Incrementing before holding the lock gives readers |
522 | priority over writers. */ | 738 | priority over writers. */ |
523 | ce->e_used++; | 739 | if (ce->e_used >= MB_CACHE_WRITER) { |
524 | while (ce->e_used >= MB_CACHE_WRITER) { | 740 | DEFINE_WAIT(wait); |
525 | ce->e_queued++; | 741 | |
526 | prepare_to_wait(&mb_cache_queue, &wait, | 742 | while (ce->e_used >= MB_CACHE_WRITER) { |
527 | TASK_UNINTERRUPTIBLE); | 743 | ce->e_queued++; |
528 | spin_unlock(&mb_cache_spinlock); | 744 | prepare_to_wait(&mb_cache_queue, &wait, |
529 | schedule(); | 745 | TASK_UNINTERRUPTIBLE); |
530 | spin_lock(&mb_cache_spinlock); | 746 | __spin_unlock_mb_cache_entry(ce); |
531 | ce->e_queued--; | 747 | schedule(); |
748 | __spin_lock_mb_cache_entry(ce); | ||
749 | ce->e_queued--; | ||
750 | } | ||
751 | finish_wait(&mb_cache_queue, &wait); | ||
532 | } | 752 | } |
533 | finish_wait(&mb_cache_queue, &wait); | 753 | __spin_unlock_mb_cache_entry(ce); |
534 | 754 | if (!list_empty(&ce->e_lru_list)) { | |
535 | if (!__mb_cache_entry_is_hashed(ce)) { | ||
536 | __mb_cache_entry_release_unlock(ce); | ||
537 | spin_lock(&mb_cache_spinlock); | 755 | spin_lock(&mb_cache_spinlock); |
756 | list_del_init(&ce->e_lru_list); | ||
757 | spin_unlock(&mb_cache_spinlock); | ||
758 | } | ||
759 | if (!__mb_cache_entry_is_block_hashed(ce)) { | ||
760 | __mb_cache_entry_release(ce); | ||
538 | return ERR_PTR(-EAGAIN); | 761 | return ERR_PTR(-EAGAIN); |
539 | } | 762 | } |
540 | return ce; | 763 | return ce; |
541 | } | 764 | } |
542 | l = l->next; | 765 | l = l->next; |
543 | } | 766 | } |
767 | hlist_bl_unlock(head); | ||
544 | return NULL; | 768 | return NULL; |
545 | } | 769 | } |
546 | 770 | ||
@@ -562,13 +786,17 @@ mb_cache_entry_find_first(struct mb_cache *cache, struct block_device *bdev, | |||
562 | unsigned int key) | 786 | unsigned int key) |
563 | { | 787 | { |
564 | unsigned int bucket = hash_long(key, cache->c_bucket_bits); | 788 | unsigned int bucket = hash_long(key, cache->c_bucket_bits); |
565 | struct list_head *l; | 789 | struct hlist_bl_node *l; |
566 | struct mb_cache_entry *ce; | 790 | struct mb_cache_entry *ce = NULL; |
567 | 791 | struct hlist_bl_head *index_hash_p; | |
568 | spin_lock(&mb_cache_spinlock); | 792 | |
569 | l = cache->c_index_hash[bucket].next; | 793 | index_hash_p = &cache->c_index_hash[bucket]; |
570 | ce = __mb_cache_entry_find(l, &cache->c_index_hash[bucket], bdev, key); | 794 | hlist_bl_lock(index_hash_p); |
571 | spin_unlock(&mb_cache_spinlock); | 795 | if (!hlist_bl_empty(index_hash_p)) { |
796 | l = hlist_bl_first(index_hash_p); | ||
797 | ce = __mb_cache_entry_find(l, index_hash_p, bdev, key); | ||
798 | } else | ||
799 | hlist_bl_unlock(index_hash_p); | ||
572 | return ce; | 800 | return ce; |
573 | } | 801 | } |
574 | 802 | ||
@@ -597,13 +825,17 @@ mb_cache_entry_find_next(struct mb_cache_entry *prev, | |||
597 | { | 825 | { |
598 | struct mb_cache *cache = prev->e_cache; | 826 | struct mb_cache *cache = prev->e_cache; |
599 | unsigned int bucket = hash_long(key, cache->c_bucket_bits); | 827 | unsigned int bucket = hash_long(key, cache->c_bucket_bits); |
600 | struct list_head *l; | 828 | struct hlist_bl_node *l; |
601 | struct mb_cache_entry *ce; | 829 | struct mb_cache_entry *ce; |
830 | struct hlist_bl_head *index_hash_p; | ||
602 | 831 | ||
603 | spin_lock(&mb_cache_spinlock); | 832 | index_hash_p = &cache->c_index_hash[bucket]; |
833 | mb_assert(prev->e_index_hash_p == index_hash_p); | ||
834 | hlist_bl_lock(index_hash_p); | ||
835 | mb_assert(!hlist_bl_empty(index_hash_p)); | ||
604 | l = prev->e_index.o_list.next; | 836 | l = prev->e_index.o_list.next; |
605 | ce = __mb_cache_entry_find(l, &cache->c_index_hash[bucket], bdev, key); | 837 | ce = __mb_cache_entry_find(l, index_hash_p, bdev, key); |
606 | __mb_cache_entry_release_unlock(prev); | 838 | __mb_cache_entry_release(prev); |
607 | return ce; | 839 | return ce; |
608 | } | 840 | } |
609 | 841 | ||