diff options
author | T Makphaibulchoke <tmac@hp.com> | 2014-03-18 19:23:20 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2014-03-18 19:23:20 -0400 |
commit | 1f3e55fe02d12213f87869768aa2b0bad3ba9a7d (patch) | |
tree | 75fc0934a88c67ba00f4c2b30fc00e3c3c281e61 /fs/mbcache.c | |
parent | 3e037e5211252902a188a6a11aecd247409d0229 (diff) |
fs/mbcache.c: doucple the locking of local from global data
The patch increases the parallelism of mbcache by using the built-in
lock in the hlist_bl_node to protect the mb_cache's local block and
index hash chains. The global data mb_cache_lru_list and
mb_cache_list continue to be protected by the global
mb_cache_spinlock.
New block group spinlock, mb_cache_bg_lock is also added to serialize
accesses to mb_cache_entry's local data.
A new member e_refcnt is added to the mb_cache_entry structure to help
preventing an mb_cache_entry from being deallocated by a free while it
is being referenced by either mb_cache_entry_get() or
mb_cache_entry_find().
Signed-off-by: T. Makphaibulchoke <tmac@hp.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/mbcache.c')
-rw-r--r-- | fs/mbcache.c | 417 |
1 files changed, 301 insertions, 116 deletions
diff --git a/fs/mbcache.c b/fs/mbcache.c index 55db0daaca74..786ecab81c99 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c | |||
@@ -26,6 +26,41 @@ | |||
26 | * back on the lru list. | 26 | * back on the lru list. |
27 | */ | 27 | */ |
28 | 28 | ||
29 | /* | ||
30 | * Lock descriptions and usage: | ||
31 | * | ||
32 | * Each hash chain of both the block and index hash tables now contains | ||
33 | * a built-in lock used to serialize accesses to the hash chain. | ||
34 | * | ||
35 | * Accesses to global data structures mb_cache_list and mb_cache_lru_list | ||
36 | * are serialized via the global spinlock mb_cache_spinlock. | ||
37 | * | ||
38 | * Each mb_cache_entry contains a spinlock, e_entry_lock, to serialize | ||
39 | * accesses to its local data, such as e_used and e_queued. | ||
40 | * | ||
41 | * Lock ordering: | ||
42 | * | ||
43 | * Each block hash chain's lock has the highest lock order, followed by an | ||
44 | * index hash chain's lock, mb_cache_bg_lock (used to implement mb_cache_entry's | ||
45 | * lock), and mb_cach_spinlock, with the lowest order. While holding | ||
46 | * either a block or index hash chain lock, a thread can acquire an | ||
47 | * mc_cache_bg_lock, which in turn can also acquire mb_cache_spinlock. | ||
48 | * | ||
49 | * Synchronization: | ||
50 | * | ||
51 | * Since both mb_cache_entry_get and mb_cache_entry_find scan the block and | ||
52 | * index hash chian, it needs to lock the corresponding hash chain. For each | ||
53 | * mb_cache_entry within the chain, it needs to lock the mb_cache_entry to | ||
54 | * prevent either any simultaneous release or free on the entry and also | ||
55 | * to serialize accesses to either the e_used or e_queued member of the entry. | ||
56 | * | ||
57 | * To avoid having a dangling reference to an already freed | ||
58 | * mb_cache_entry, an mb_cache_entry is only freed when it is not on a | ||
59 | * block hash chain and also no longer being referenced, both e_used, | ||
60 | * and e_queued are 0's. When an mb_cache_entry is explicitly freed it is | ||
61 | * first removed from a block hash chain. | ||
62 | */ | ||
63 | |||
29 | #include <linux/kernel.h> | 64 | #include <linux/kernel.h> |
30 | #include <linux/module.h> | 65 | #include <linux/module.h> |
31 | 66 | ||
@@ -37,6 +72,7 @@ | |||
37 | #include <linux/list_bl.h> | 72 | #include <linux/list_bl.h> |
38 | #include <linux/mbcache.h> | 73 | #include <linux/mbcache.h> |
39 | #include <linux/init.h> | 74 | #include <linux/init.h> |
75 | #include <linux/blockgroup_lock.h> | ||
40 | 76 | ||
41 | #ifdef MB_CACHE_DEBUG | 77 | #ifdef MB_CACHE_DEBUG |
42 | # define mb_debug(f...) do { \ | 78 | # define mb_debug(f...) do { \ |
@@ -57,8 +93,13 @@ | |||
57 | 93 | ||
58 | #define MB_CACHE_WRITER ((unsigned short)~0U >> 1) | 94 | #define MB_CACHE_WRITER ((unsigned short)~0U >> 1) |
59 | 95 | ||
96 | #define MB_CACHE_ENTRY_LOCK_BITS __builtin_log2(NR_BG_LOCKS) | ||
97 | #define MB_CACHE_ENTRY_LOCK_INDEX(ce) \ | ||
98 | (hash_long((unsigned long)ce, MB_CACHE_ENTRY_LOCK_BITS)) | ||
99 | |||
60 | static DECLARE_WAIT_QUEUE_HEAD(mb_cache_queue); | 100 | static DECLARE_WAIT_QUEUE_HEAD(mb_cache_queue); |
61 | 101 | static struct blockgroup_lock *mb_cache_bg_lock; | |
102 | |||
62 | MODULE_AUTHOR("Andreas Gruenbacher <a.gruenbacher@computer.org>"); | 103 | MODULE_AUTHOR("Andreas Gruenbacher <a.gruenbacher@computer.org>"); |
63 | MODULE_DESCRIPTION("Meta block cache (for extended attributes)"); | 104 | MODULE_DESCRIPTION("Meta block cache (for extended attributes)"); |
64 | MODULE_LICENSE("GPL"); | 105 | MODULE_LICENSE("GPL"); |
@@ -86,6 +127,20 @@ static LIST_HEAD(mb_cache_list); | |||
86 | static LIST_HEAD(mb_cache_lru_list); | 127 | static LIST_HEAD(mb_cache_lru_list); |
87 | static DEFINE_SPINLOCK(mb_cache_spinlock); | 128 | static DEFINE_SPINLOCK(mb_cache_spinlock); |
88 | 129 | ||
130 | static inline void | ||
131 | __spin_lock_mb_cache_entry(struct mb_cache_entry *ce) | ||
132 | { | ||
133 | spin_lock(bgl_lock_ptr(mb_cache_bg_lock, | ||
134 | MB_CACHE_ENTRY_LOCK_INDEX(ce))); | ||
135 | } | ||
136 | |||
137 | static inline void | ||
138 | __spin_unlock_mb_cache_entry(struct mb_cache_entry *ce) | ||
139 | { | ||
140 | spin_unlock(bgl_lock_ptr(mb_cache_bg_lock, | ||
141 | MB_CACHE_ENTRY_LOCK_INDEX(ce))); | ||
142 | } | ||
143 | |||
89 | static inline int | 144 | static inline int |
90 | __mb_cache_entry_is_block_hashed(struct mb_cache_entry *ce) | 145 | __mb_cache_entry_is_block_hashed(struct mb_cache_entry *ce) |
91 | { | 146 | { |
@@ -113,11 +168,21 @@ __mb_cache_entry_unhash_index(struct mb_cache_entry *ce) | |||
113 | hlist_bl_del_init(&ce->e_index.o_list); | 168 | hlist_bl_del_init(&ce->e_index.o_list); |
114 | } | 169 | } |
115 | 170 | ||
171 | /* | ||
172 | * __mb_cache_entry_unhash_unlock() | ||
173 | * | ||
174 | * This function is called to unhash both the block and index hash | ||
175 | * chain. | ||
176 | * It assumes both the block and index hash chain is locked upon entry. | ||
177 | * It also unlock both hash chains both exit | ||
178 | */ | ||
116 | static inline void | 179 | static inline void |
117 | __mb_cache_entry_unhash(struct mb_cache_entry *ce) | 180 | __mb_cache_entry_unhash_unlock(struct mb_cache_entry *ce) |
118 | { | 181 | { |
119 | __mb_cache_entry_unhash_index(ce); | 182 | __mb_cache_entry_unhash_index(ce); |
183 | hlist_bl_unlock(ce->e_index_hash_p); | ||
120 | __mb_cache_entry_unhash_block(ce); | 184 | __mb_cache_entry_unhash_block(ce); |
185 | hlist_bl_unlock(ce->e_block_hash_p); | ||
121 | } | 186 | } |
122 | 187 | ||
123 | static void | 188 | static void |
@@ -125,36 +190,47 @@ __mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask) | |||
125 | { | 190 | { |
126 | struct mb_cache *cache = ce->e_cache; | 191 | struct mb_cache *cache = ce->e_cache; |
127 | 192 | ||
128 | mb_assert(!(ce->e_used || ce->e_queued)); | 193 | mb_assert(!(ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt))); |
129 | kmem_cache_free(cache->c_entry_cache, ce); | 194 | kmem_cache_free(cache->c_entry_cache, ce); |
130 | atomic_dec(&cache->c_entry_count); | 195 | atomic_dec(&cache->c_entry_count); |
131 | } | 196 | } |
132 | 197 | ||
133 | |||
134 | static void | 198 | static void |
135 | __mb_cache_entry_release_unlock(struct mb_cache_entry *ce) | 199 | __mb_cache_entry_release(struct mb_cache_entry *ce) |
136 | __releases(mb_cache_spinlock) | ||
137 | { | 200 | { |
201 | /* First lock the entry to serialize access to its local data. */ | ||
202 | __spin_lock_mb_cache_entry(ce); | ||
138 | /* Wake up all processes queuing for this cache entry. */ | 203 | /* Wake up all processes queuing for this cache entry. */ |
139 | if (ce->e_queued) | 204 | if (ce->e_queued) |
140 | wake_up_all(&mb_cache_queue); | 205 | wake_up_all(&mb_cache_queue); |
141 | if (ce->e_used >= MB_CACHE_WRITER) | 206 | if (ce->e_used >= MB_CACHE_WRITER) |
142 | ce->e_used -= MB_CACHE_WRITER; | 207 | ce->e_used -= MB_CACHE_WRITER; |
208 | /* | ||
209 | * Make sure that all cache entries on lru_list have | ||
210 | * both e_used and e_qued of 0s. | ||
211 | */ | ||
143 | ce->e_used--; | 212 | ce->e_used--; |
144 | if (!(ce->e_used || ce->e_queued)) { | 213 | if (!(ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt))) { |
145 | if (!__mb_cache_entry_is_block_hashed(ce)) | 214 | if (!__mb_cache_entry_is_block_hashed(ce)) { |
215 | __spin_unlock_mb_cache_entry(ce); | ||
146 | goto forget; | 216 | goto forget; |
147 | mb_assert(list_empty(&ce->e_lru_list)); | 217 | } |
148 | list_add_tail(&ce->e_lru_list, &mb_cache_lru_list); | 218 | /* |
219 | * Need access to lru list, first drop entry lock, | ||
220 | * then reacquire the lock in the proper order. | ||
221 | */ | ||
222 | spin_lock(&mb_cache_spinlock); | ||
223 | if (list_empty(&ce->e_lru_list)) | ||
224 | list_add_tail(&ce->e_lru_list, &mb_cache_lru_list); | ||
225 | spin_unlock(&mb_cache_spinlock); | ||
149 | } | 226 | } |
150 | spin_unlock(&mb_cache_spinlock); | 227 | __spin_unlock_mb_cache_entry(ce); |
151 | return; | 228 | return; |
152 | forget: | 229 | forget: |
153 | spin_unlock(&mb_cache_spinlock); | 230 | mb_assert(list_empty(&ce->e_lru_list)); |
154 | __mb_cache_entry_forget(ce, GFP_KERNEL); | 231 | __mb_cache_entry_forget(ce, GFP_KERNEL); |
155 | } | 232 | } |
156 | 233 | ||
157 | |||
158 | /* | 234 | /* |
159 | * mb_cache_shrink_scan() memory pressure callback | 235 | * mb_cache_shrink_scan() memory pressure callback |
160 | * | 236 | * |
@@ -177,17 +253,34 @@ mb_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) | |||
177 | 253 | ||
178 | mb_debug("trying to free %d entries", nr_to_scan); | 254 | mb_debug("trying to free %d entries", nr_to_scan); |
179 | spin_lock(&mb_cache_spinlock); | 255 | spin_lock(&mb_cache_spinlock); |
180 | while (nr_to_scan-- && !list_empty(&mb_cache_lru_list)) { | 256 | while ((nr_to_scan-- > 0) && !list_empty(&mb_cache_lru_list)) { |
181 | struct mb_cache_entry *ce = | 257 | struct mb_cache_entry *ce = |
182 | list_entry(mb_cache_lru_list.next, | 258 | list_entry(mb_cache_lru_list.next, |
183 | struct mb_cache_entry, e_lru_list); | 259 | struct mb_cache_entry, e_lru_list); |
184 | list_move_tail(&ce->e_lru_list, &free_list); | 260 | list_del_init(&ce->e_lru_list); |
185 | __mb_cache_entry_unhash(ce); | 261 | if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt)) |
186 | freed++; | 262 | continue; |
263 | spin_unlock(&mb_cache_spinlock); | ||
264 | /* Prevent any find or get operation on the entry */ | ||
265 | hlist_bl_lock(ce->e_block_hash_p); | ||
266 | hlist_bl_lock(ce->e_index_hash_p); | ||
267 | /* Ignore if it is touched by a find/get */ | ||
268 | if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt) || | ||
269 | !list_empty(&ce->e_lru_list)) { | ||
270 | hlist_bl_unlock(ce->e_index_hash_p); | ||
271 | hlist_bl_unlock(ce->e_block_hash_p); | ||
272 | spin_lock(&mb_cache_spinlock); | ||
273 | continue; | ||
274 | } | ||
275 | __mb_cache_entry_unhash_unlock(ce); | ||
276 | list_add_tail(&ce->e_lru_list, &free_list); | ||
277 | spin_lock(&mb_cache_spinlock); | ||
187 | } | 278 | } |
188 | spin_unlock(&mb_cache_spinlock); | 279 | spin_unlock(&mb_cache_spinlock); |
280 | |||
189 | list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) { | 281 | list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) { |
190 | __mb_cache_entry_forget(entry, gfp_mask); | 282 | __mb_cache_entry_forget(entry, gfp_mask); |
283 | freed++; | ||
191 | } | 284 | } |
192 | return freed; | 285 | return freed; |
193 | } | 286 | } |
@@ -232,6 +325,14 @@ mb_cache_create(const char *name, int bucket_bits) | |||
232 | int n, bucket_count = 1 << bucket_bits; | 325 | int n, bucket_count = 1 << bucket_bits; |
233 | struct mb_cache *cache = NULL; | 326 | struct mb_cache *cache = NULL; |
234 | 327 | ||
328 | if (!mb_cache_bg_lock) { | ||
329 | mb_cache_bg_lock = kmalloc(sizeof(struct blockgroup_lock), | ||
330 | GFP_KERNEL); | ||
331 | if (!mb_cache_bg_lock) | ||
332 | return NULL; | ||
333 | bgl_lock_init(mb_cache_bg_lock); | ||
334 | } | ||
335 | |||
235 | cache = kmalloc(sizeof(struct mb_cache), GFP_KERNEL); | 336 | cache = kmalloc(sizeof(struct mb_cache), GFP_KERNEL); |
236 | if (!cache) | 337 | if (!cache) |
237 | return NULL; | 338 | return NULL; |
@@ -290,21 +391,47 @@ void | |||
290 | mb_cache_shrink(struct block_device *bdev) | 391 | mb_cache_shrink(struct block_device *bdev) |
291 | { | 392 | { |
292 | LIST_HEAD(free_list); | 393 | LIST_HEAD(free_list); |
293 | struct list_head *l, *ltmp; | 394 | struct list_head *l; |
395 | struct mb_cache_entry *ce, *tmp; | ||
294 | 396 | ||
397 | l = &mb_cache_lru_list; | ||
295 | spin_lock(&mb_cache_spinlock); | 398 | spin_lock(&mb_cache_spinlock); |
296 | list_for_each_safe(l, ltmp, &mb_cache_lru_list) { | 399 | while (!list_is_last(l, &mb_cache_lru_list)) { |
297 | struct mb_cache_entry *ce = | 400 | l = l->next; |
298 | list_entry(l, struct mb_cache_entry, e_lru_list); | 401 | ce = list_entry(l, struct mb_cache_entry, e_lru_list); |
299 | if (ce->e_bdev == bdev) { | 402 | if (ce->e_bdev == bdev) { |
300 | list_move_tail(&ce->e_lru_list, &free_list); | 403 | list_del_init(&ce->e_lru_list); |
301 | __mb_cache_entry_unhash(ce); | 404 | if (ce->e_used || ce->e_queued || |
405 | atomic_read(&ce->e_refcnt)) | ||
406 | continue; | ||
407 | spin_unlock(&mb_cache_spinlock); | ||
408 | /* | ||
409 | * Prevent any find or get operation on the entry. | ||
410 | */ | ||
411 | hlist_bl_lock(ce->e_block_hash_p); | ||
412 | hlist_bl_lock(ce->e_index_hash_p); | ||
413 | /* Ignore if it is touched by a find/get */ | ||
414 | if (ce->e_used || ce->e_queued || | ||
415 | atomic_read(&ce->e_refcnt) || | ||
416 | !list_empty(&ce->e_lru_list)) { | ||
417 | hlist_bl_unlock(ce->e_index_hash_p); | ||
418 | hlist_bl_unlock(ce->e_block_hash_p); | ||
419 | l = &mb_cache_lru_list; | ||
420 | spin_lock(&mb_cache_spinlock); | ||
421 | continue; | ||
422 | } | ||
423 | __mb_cache_entry_unhash_unlock(ce); | ||
424 | mb_assert(!(ce->e_used || ce->e_queued || | ||
425 | atomic_read(&ce->e_refcnt))); | ||
426 | list_add_tail(&ce->e_lru_list, &free_list); | ||
427 | l = &mb_cache_lru_list; | ||
428 | spin_lock(&mb_cache_spinlock); | ||
302 | } | 429 | } |
303 | } | 430 | } |
304 | spin_unlock(&mb_cache_spinlock); | 431 | spin_unlock(&mb_cache_spinlock); |
305 | list_for_each_safe(l, ltmp, &free_list) { | 432 | |
306 | __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry, | 433 | list_for_each_entry_safe(ce, tmp, &free_list, e_lru_list) { |
307 | e_lru_list), GFP_KERNEL); | 434 | __mb_cache_entry_forget(ce, GFP_KERNEL); |
308 | } | 435 | } |
309 | } | 436 | } |
310 | 437 | ||
@@ -320,23 +447,27 @@ void | |||
320 | mb_cache_destroy(struct mb_cache *cache) | 447 | mb_cache_destroy(struct mb_cache *cache) |
321 | { | 448 | { |
322 | LIST_HEAD(free_list); | 449 | LIST_HEAD(free_list); |
323 | struct list_head *l, *ltmp; | 450 | struct mb_cache_entry *ce, *tmp; |
324 | 451 | ||
325 | spin_lock(&mb_cache_spinlock); | 452 | spin_lock(&mb_cache_spinlock); |
326 | list_for_each_safe(l, ltmp, &mb_cache_lru_list) { | 453 | list_for_each_entry_safe(ce, tmp, &mb_cache_lru_list, e_lru_list) { |
327 | struct mb_cache_entry *ce = | 454 | if (ce->e_cache == cache) |
328 | list_entry(l, struct mb_cache_entry, e_lru_list); | ||
329 | if (ce->e_cache == cache) { | ||
330 | list_move_tail(&ce->e_lru_list, &free_list); | 455 | list_move_tail(&ce->e_lru_list, &free_list); |
331 | __mb_cache_entry_unhash(ce); | ||
332 | } | ||
333 | } | 456 | } |
334 | list_del(&cache->c_cache_list); | 457 | list_del(&cache->c_cache_list); |
335 | spin_unlock(&mb_cache_spinlock); | 458 | spin_unlock(&mb_cache_spinlock); |
336 | 459 | ||
337 | list_for_each_safe(l, ltmp, &free_list) { | 460 | list_for_each_entry_safe(ce, tmp, &free_list, e_lru_list) { |
338 | __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry, | 461 | list_del_init(&ce->e_lru_list); |
339 | e_lru_list), GFP_KERNEL); | 462 | /* |
463 | * Prevent any find or get operation on the entry. | ||
464 | */ | ||
465 | hlist_bl_lock(ce->e_block_hash_p); | ||
466 | hlist_bl_lock(ce->e_index_hash_p); | ||
467 | mb_assert(!(ce->e_used || ce->e_queued || | ||
468 | atomic_read(&ce->e_refcnt))); | ||
469 | __mb_cache_entry_unhash_unlock(ce); | ||
470 | __mb_cache_entry_forget(ce, GFP_KERNEL); | ||
340 | } | 471 | } |
341 | 472 | ||
342 | if (atomic_read(&cache->c_entry_count) > 0) { | 473 | if (atomic_read(&cache->c_entry_count) > 0) { |
@@ -345,8 +476,6 @@ mb_cache_destroy(struct mb_cache *cache) | |||
345 | atomic_read(&cache->c_entry_count)); | 476 | atomic_read(&cache->c_entry_count)); |
346 | } | 477 | } |
347 | 478 | ||
348 | kmem_cache_destroy(cache->c_entry_cache); | ||
349 | |||
350 | kfree(cache->c_index_hash); | 479 | kfree(cache->c_index_hash); |
351 | kfree(cache->c_block_hash); | 480 | kfree(cache->c_block_hash); |
352 | kfree(cache); | 481 | kfree(cache); |
@@ -363,29 +492,59 @@ mb_cache_destroy(struct mb_cache *cache) | |||
363 | struct mb_cache_entry * | 492 | struct mb_cache_entry * |
364 | mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags) | 493 | mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags) |
365 | { | 494 | { |
366 | struct mb_cache_entry *ce = NULL; | 495 | struct mb_cache_entry *ce; |
367 | 496 | ||
368 | if (atomic_read(&cache->c_entry_count) >= cache->c_max_entries) { | 497 | if (atomic_read(&cache->c_entry_count) >= cache->c_max_entries) { |
498 | struct list_head *l; | ||
499 | |||
500 | l = &mb_cache_lru_list; | ||
369 | spin_lock(&mb_cache_spinlock); | 501 | spin_lock(&mb_cache_spinlock); |
370 | if (!list_empty(&mb_cache_lru_list)) { | 502 | while (!list_is_last(l, &mb_cache_lru_list)) { |
371 | ce = list_entry(mb_cache_lru_list.next, | 503 | l = l->next; |
372 | struct mb_cache_entry, e_lru_list); | 504 | ce = list_entry(l, struct mb_cache_entry, e_lru_list); |
373 | list_del_init(&ce->e_lru_list); | 505 | if (ce->e_cache == cache) { |
374 | __mb_cache_entry_unhash(ce); | 506 | list_del_init(&ce->e_lru_list); |
507 | if (ce->e_used || ce->e_queued || | ||
508 | atomic_read(&ce->e_refcnt)) | ||
509 | continue; | ||
510 | spin_unlock(&mb_cache_spinlock); | ||
511 | /* | ||
512 | * Prevent any find or get operation on the | ||
513 | * entry. | ||
514 | */ | ||
515 | hlist_bl_lock(ce->e_block_hash_p); | ||
516 | hlist_bl_lock(ce->e_index_hash_p); | ||
517 | /* Ignore if it is touched by a find/get */ | ||
518 | if (ce->e_used || ce->e_queued || | ||
519 | atomic_read(&ce->e_refcnt) || | ||
520 | !list_empty(&ce->e_lru_list)) { | ||
521 | hlist_bl_unlock(ce->e_index_hash_p); | ||
522 | hlist_bl_unlock(ce->e_block_hash_p); | ||
523 | l = &mb_cache_lru_list; | ||
524 | spin_lock(&mb_cache_spinlock); | ||
525 | continue; | ||
526 | } | ||
527 | mb_assert(list_empty(&ce->e_lru_list)); | ||
528 | mb_assert(!(ce->e_used || ce->e_queued || | ||
529 | atomic_read(&ce->e_refcnt))); | ||
530 | __mb_cache_entry_unhash_unlock(ce); | ||
531 | goto found; | ||
532 | } | ||
375 | } | 533 | } |
376 | spin_unlock(&mb_cache_spinlock); | 534 | spin_unlock(&mb_cache_spinlock); |
377 | } | 535 | } |
378 | if (!ce) { | 536 | |
379 | ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags); | 537 | ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags); |
380 | if (!ce) | 538 | if (!ce) |
381 | return NULL; | 539 | return NULL; |
382 | atomic_inc(&cache->c_entry_count); | 540 | atomic_inc(&cache->c_entry_count); |
383 | INIT_LIST_HEAD(&ce->e_lru_list); | 541 | INIT_LIST_HEAD(&ce->e_lru_list); |
384 | INIT_HLIST_BL_NODE(&ce->e_block_list); | 542 | INIT_HLIST_BL_NODE(&ce->e_block_list); |
385 | INIT_HLIST_BL_NODE(&ce->e_index.o_list); | 543 | INIT_HLIST_BL_NODE(&ce->e_index.o_list); |
386 | ce->e_cache = cache; | 544 | ce->e_cache = cache; |
387 | ce->e_queued = 0; | 545 | ce->e_queued = 0; |
388 | } | 546 | atomic_set(&ce->e_refcnt, 0); |
547 | found: | ||
389 | ce->e_block_hash_p = &cache->c_block_hash[0]; | 548 | ce->e_block_hash_p = &cache->c_block_hash[0]; |
390 | ce->e_index_hash_p = &cache->c_index_hash[0]; | 549 | ce->e_index_hash_p = &cache->c_index_hash[0]; |
391 | ce->e_used = 1 + MB_CACHE_WRITER; | 550 | ce->e_used = 1 + MB_CACHE_WRITER; |
@@ -414,7 +573,6 @@ mb_cache_entry_insert(struct mb_cache_entry *ce, struct block_device *bdev, | |||
414 | struct mb_cache *cache = ce->e_cache; | 573 | struct mb_cache *cache = ce->e_cache; |
415 | unsigned int bucket; | 574 | unsigned int bucket; |
416 | struct hlist_bl_node *l; | 575 | struct hlist_bl_node *l; |
417 | int error = -EBUSY; | ||
418 | struct hlist_bl_head *block_hash_p; | 576 | struct hlist_bl_head *block_hash_p; |
419 | struct hlist_bl_head *index_hash_p; | 577 | struct hlist_bl_head *index_hash_p; |
420 | struct mb_cache_entry *lce; | 578 | struct mb_cache_entry *lce; |
@@ -423,26 +581,29 @@ mb_cache_entry_insert(struct mb_cache_entry *ce, struct block_device *bdev, | |||
423 | bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), | 581 | bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), |
424 | cache->c_bucket_bits); | 582 | cache->c_bucket_bits); |
425 | block_hash_p = &cache->c_block_hash[bucket]; | 583 | block_hash_p = &cache->c_block_hash[bucket]; |
426 | spin_lock(&mb_cache_spinlock); | 584 | hlist_bl_lock(block_hash_p); |
427 | hlist_bl_for_each_entry(lce, l, block_hash_p, e_block_list) { | 585 | hlist_bl_for_each_entry(lce, l, block_hash_p, e_block_list) { |
428 | if (lce->e_bdev == bdev && lce->e_block == block) | 586 | if (lce->e_bdev == bdev && lce->e_block == block) { |
429 | goto out; | 587 | hlist_bl_unlock(block_hash_p); |
588 | return -EBUSY; | ||
589 | } | ||
430 | } | 590 | } |
431 | mb_assert(!__mb_cache_entry_is_block_hashed(ce)); | 591 | mb_assert(!__mb_cache_entry_is_block_hashed(ce)); |
432 | __mb_cache_entry_unhash(ce); | 592 | __mb_cache_entry_unhash_block(ce); |
593 | __mb_cache_entry_unhash_index(ce); | ||
433 | ce->e_bdev = bdev; | 594 | ce->e_bdev = bdev; |
434 | ce->e_block = block; | 595 | ce->e_block = block; |
435 | ce->e_block_hash_p = block_hash_p; | 596 | ce->e_block_hash_p = block_hash_p; |
436 | ce->e_index.o_key = key; | 597 | ce->e_index.o_key = key; |
598 | hlist_bl_add_head(&ce->e_block_list, block_hash_p); | ||
599 | hlist_bl_unlock(block_hash_p); | ||
437 | bucket = hash_long(key, cache->c_bucket_bits); | 600 | bucket = hash_long(key, cache->c_bucket_bits); |
438 | index_hash_p = &cache->c_index_hash[bucket]; | 601 | index_hash_p = &cache->c_index_hash[bucket]; |
602 | hlist_bl_lock(index_hash_p); | ||
439 | ce->e_index_hash_p = index_hash_p; | 603 | ce->e_index_hash_p = index_hash_p; |
440 | hlist_bl_add_head(&ce->e_index.o_list, index_hash_p); | 604 | hlist_bl_add_head(&ce->e_index.o_list, index_hash_p); |
441 | hlist_bl_add_head(&ce->e_block_list, block_hash_p); | 605 | hlist_bl_unlock(index_hash_p); |
442 | error = 0; | 606 | return 0; |
443 | out: | ||
444 | spin_unlock(&mb_cache_spinlock); | ||
445 | return error; | ||
446 | } | 607 | } |
447 | 608 | ||
448 | 609 | ||
@@ -456,24 +617,26 @@ out: | |||
456 | void | 617 | void |
457 | mb_cache_entry_release(struct mb_cache_entry *ce) | 618 | mb_cache_entry_release(struct mb_cache_entry *ce) |
458 | { | 619 | { |
459 | spin_lock(&mb_cache_spinlock); | 620 | __mb_cache_entry_release(ce); |
460 | __mb_cache_entry_release_unlock(ce); | ||
461 | } | 621 | } |
462 | 622 | ||
463 | 623 | ||
464 | /* | 624 | /* |
465 | * mb_cache_entry_free() | 625 | * mb_cache_entry_free() |
466 | * | 626 | * |
467 | * This is equivalent to the sequence mb_cache_entry_takeout() -- | ||
468 | * mb_cache_entry_release(). | ||
469 | */ | 627 | */ |
470 | void | 628 | void |
471 | mb_cache_entry_free(struct mb_cache_entry *ce) | 629 | mb_cache_entry_free(struct mb_cache_entry *ce) |
472 | { | 630 | { |
473 | spin_lock(&mb_cache_spinlock); | 631 | mb_assert(ce); |
474 | mb_assert(list_empty(&ce->e_lru_list)); | 632 | mb_assert(list_empty(&ce->e_lru_list)); |
475 | __mb_cache_entry_unhash(ce); | 633 | hlist_bl_lock(ce->e_index_hash_p); |
476 | __mb_cache_entry_release_unlock(ce); | 634 | __mb_cache_entry_unhash_index(ce); |
635 | hlist_bl_unlock(ce->e_index_hash_p); | ||
636 | hlist_bl_lock(ce->e_block_hash_p); | ||
637 | __mb_cache_entry_unhash_block(ce); | ||
638 | hlist_bl_unlock(ce->e_block_hash_p); | ||
639 | __mb_cache_entry_release(ce); | ||
477 | } | 640 | } |
478 | 641 | ||
479 | 642 | ||
@@ -497,39 +660,48 @@ mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev, | |||
497 | bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), | 660 | bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), |
498 | cache->c_bucket_bits); | 661 | cache->c_bucket_bits); |
499 | block_hash_p = &cache->c_block_hash[bucket]; | 662 | block_hash_p = &cache->c_block_hash[bucket]; |
500 | spin_lock(&mb_cache_spinlock); | 663 | /* First serialize access to the block corresponding hash chain. */ |
664 | hlist_bl_lock(block_hash_p); | ||
501 | hlist_bl_for_each_entry(ce, l, block_hash_p, e_block_list) { | 665 | hlist_bl_for_each_entry(ce, l, block_hash_p, e_block_list) { |
502 | mb_assert(ce->e_block_hash_p == block_hash_p); | 666 | mb_assert(ce->e_block_hash_p == block_hash_p); |
503 | if (ce->e_bdev == bdev && ce->e_block == block) { | 667 | if (ce->e_bdev == bdev && ce->e_block == block) { |
504 | DEFINE_WAIT(wait); | 668 | /* |
669 | * Prevent a free from removing the entry. | ||
670 | */ | ||
671 | atomic_inc(&ce->e_refcnt); | ||
672 | hlist_bl_unlock(block_hash_p); | ||
673 | __spin_lock_mb_cache_entry(ce); | ||
674 | atomic_dec(&ce->e_refcnt); | ||
675 | if (ce->e_used > 0) { | ||
676 | DEFINE_WAIT(wait); | ||
677 | while (ce->e_used > 0) { | ||
678 | ce->e_queued++; | ||
679 | prepare_to_wait(&mb_cache_queue, &wait, | ||
680 | TASK_UNINTERRUPTIBLE); | ||
681 | __spin_unlock_mb_cache_entry(ce); | ||
682 | schedule(); | ||
683 | __spin_lock_mb_cache_entry(ce); | ||
684 | ce->e_queued--; | ||
685 | } | ||
686 | finish_wait(&mb_cache_queue, &wait); | ||
687 | } | ||
688 | ce->e_used += 1 + MB_CACHE_WRITER; | ||
689 | __spin_unlock_mb_cache_entry(ce); | ||
505 | 690 | ||
506 | if (!list_empty(&ce->e_lru_list)) | 691 | if (!list_empty(&ce->e_lru_list)) { |
692 | spin_lock(&mb_cache_spinlock); | ||
507 | list_del_init(&ce->e_lru_list); | 693 | list_del_init(&ce->e_lru_list); |
508 | |||
509 | while (ce->e_used > 0) { | ||
510 | ce->e_queued++; | ||
511 | prepare_to_wait(&mb_cache_queue, &wait, | ||
512 | TASK_UNINTERRUPTIBLE); | ||
513 | spin_unlock(&mb_cache_spinlock); | 694 | spin_unlock(&mb_cache_spinlock); |
514 | schedule(); | ||
515 | spin_lock(&mb_cache_spinlock); | ||
516 | ce->e_queued--; | ||
517 | } | 695 | } |
518 | finish_wait(&mb_cache_queue, &wait); | ||
519 | ce->e_used += 1 + MB_CACHE_WRITER; | ||
520 | |||
521 | if (!__mb_cache_entry_is_block_hashed(ce)) { | 696 | if (!__mb_cache_entry_is_block_hashed(ce)) { |
522 | __mb_cache_entry_release_unlock(ce); | 697 | __mb_cache_entry_release(ce); |
523 | return NULL; | 698 | return NULL; |
524 | } | 699 | } |
525 | goto cleanup; | 700 | return ce; |
526 | } | 701 | } |
527 | } | 702 | } |
528 | ce = NULL; | 703 | hlist_bl_unlock(block_hash_p); |
529 | 704 | return NULL; | |
530 | cleanup: | ||
531 | spin_unlock(&mb_cache_spinlock); | ||
532 | return ce; | ||
533 | } | 705 | } |
534 | 706 | ||
535 | #if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) | 707 | #if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) |
@@ -538,40 +710,53 @@ static struct mb_cache_entry * | |||
538 | __mb_cache_entry_find(struct hlist_bl_node *l, struct hlist_bl_head *head, | 710 | __mb_cache_entry_find(struct hlist_bl_node *l, struct hlist_bl_head *head, |
539 | struct block_device *bdev, unsigned int key) | 711 | struct block_device *bdev, unsigned int key) |
540 | { | 712 | { |
713 | |||
714 | /* The index hash chain is alredy acquire by caller. */ | ||
541 | while (l != NULL) { | 715 | while (l != NULL) { |
542 | struct mb_cache_entry *ce = | 716 | struct mb_cache_entry *ce = |
543 | hlist_bl_entry(l, struct mb_cache_entry, | 717 | hlist_bl_entry(l, struct mb_cache_entry, |
544 | e_index.o_list); | 718 | e_index.o_list); |
545 | mb_assert(ce->e_index_hash_p == head); | 719 | mb_assert(ce->e_index_hash_p == head); |
546 | if (ce->e_bdev == bdev && ce->e_index.o_key == key) { | 720 | if (ce->e_bdev == bdev && ce->e_index.o_key == key) { |
547 | DEFINE_WAIT(wait); | 721 | /* |
548 | 722 | * Prevent a free from removing the entry. | |
549 | if (!list_empty(&ce->e_lru_list)) | 723 | */ |
550 | list_del_init(&ce->e_lru_list); | 724 | atomic_inc(&ce->e_refcnt); |
551 | 725 | hlist_bl_unlock(head); | |
726 | __spin_lock_mb_cache_entry(ce); | ||
727 | atomic_dec(&ce->e_refcnt); | ||
728 | ce->e_used++; | ||
552 | /* Incrementing before holding the lock gives readers | 729 | /* Incrementing before holding the lock gives readers |
553 | priority over writers. */ | 730 | priority over writers. */ |
554 | ce->e_used++; | 731 | if (ce->e_used >= MB_CACHE_WRITER) { |
555 | while (ce->e_used >= MB_CACHE_WRITER) { | 732 | DEFINE_WAIT(wait); |
556 | ce->e_queued++; | 733 | |
557 | prepare_to_wait(&mb_cache_queue, &wait, | 734 | while (ce->e_used >= MB_CACHE_WRITER) { |
558 | TASK_UNINTERRUPTIBLE); | 735 | ce->e_queued++; |
559 | spin_unlock(&mb_cache_spinlock); | 736 | prepare_to_wait(&mb_cache_queue, &wait, |
560 | schedule(); | 737 | TASK_UNINTERRUPTIBLE); |
738 | __spin_unlock_mb_cache_entry(ce); | ||
739 | schedule(); | ||
740 | __spin_lock_mb_cache_entry(ce); | ||
741 | ce->e_queued--; | ||
742 | } | ||
743 | finish_wait(&mb_cache_queue, &wait); | ||
744 | } | ||
745 | __spin_unlock_mb_cache_entry(ce); | ||
746 | if (!list_empty(&ce->e_lru_list)) { | ||
561 | spin_lock(&mb_cache_spinlock); | 747 | spin_lock(&mb_cache_spinlock); |
562 | ce->e_queued--; | 748 | list_del_init(&ce->e_lru_list); |
749 | spin_unlock(&mb_cache_spinlock); | ||
563 | } | 750 | } |
564 | finish_wait(&mb_cache_queue, &wait); | ||
565 | |||
566 | if (!__mb_cache_entry_is_block_hashed(ce)) { | 751 | if (!__mb_cache_entry_is_block_hashed(ce)) { |
567 | __mb_cache_entry_release_unlock(ce); | 752 | __mb_cache_entry_release(ce); |
568 | spin_lock(&mb_cache_spinlock); | ||
569 | return ERR_PTR(-EAGAIN); | 753 | return ERR_PTR(-EAGAIN); |
570 | } | 754 | } |
571 | return ce; | 755 | return ce; |
572 | } | 756 | } |
573 | l = l->next; | 757 | l = l->next; |
574 | } | 758 | } |
759 | hlist_bl_unlock(head); | ||
575 | return NULL; | 760 | return NULL; |
576 | } | 761 | } |
577 | 762 | ||
@@ -598,12 +783,12 @@ mb_cache_entry_find_first(struct mb_cache *cache, struct block_device *bdev, | |||
598 | struct hlist_bl_head *index_hash_p; | 783 | struct hlist_bl_head *index_hash_p; |
599 | 784 | ||
600 | index_hash_p = &cache->c_index_hash[bucket]; | 785 | index_hash_p = &cache->c_index_hash[bucket]; |
601 | spin_lock(&mb_cache_spinlock); | 786 | hlist_bl_lock(index_hash_p); |
602 | if (!hlist_bl_empty(index_hash_p)) { | 787 | if (!hlist_bl_empty(index_hash_p)) { |
603 | l = hlist_bl_first(index_hash_p); | 788 | l = hlist_bl_first(index_hash_p); |
604 | ce = __mb_cache_entry_find(l, index_hash_p, bdev, key); | 789 | ce = __mb_cache_entry_find(l, index_hash_p, bdev, key); |
605 | } | 790 | } else |
606 | spin_unlock(&mb_cache_spinlock); | 791 | hlist_bl_unlock(index_hash_p); |
607 | return ce; | 792 | return ce; |
608 | } | 793 | } |
609 | 794 | ||
@@ -638,11 +823,11 @@ mb_cache_entry_find_next(struct mb_cache_entry *prev, | |||
638 | 823 | ||
639 | index_hash_p = &cache->c_index_hash[bucket]; | 824 | index_hash_p = &cache->c_index_hash[bucket]; |
640 | mb_assert(prev->e_index_hash_p == index_hash_p); | 825 | mb_assert(prev->e_index_hash_p == index_hash_p); |
641 | spin_lock(&mb_cache_spinlock); | 826 | hlist_bl_lock(index_hash_p); |
642 | mb_assert(!hlist_bl_empty(index_hash_p)); | 827 | mb_assert(!hlist_bl_empty(index_hash_p)); |
643 | l = prev->e_index.o_list.next; | 828 | l = prev->e_index.o_list.next; |
644 | ce = __mb_cache_entry_find(l, index_hash_p, bdev, key); | 829 | ce = __mb_cache_entry_find(l, index_hash_p, bdev, key); |
645 | __mb_cache_entry_release_unlock(prev); | 830 | __mb_cache_entry_release(prev); |
646 | return ce; | 831 | return ce; |
647 | } | 832 | } |
648 | 833 | ||