aboutsummaryrefslogtreecommitdiffstats
path: root/fs/mbcache.c
diff options
context:
space:
mode:
authorT Makphaibulchoke <tmac@hp.com>2014-03-18 19:23:20 -0400
committerTheodore Ts'o <tytso@mit.edu>2014-03-18 19:23:20 -0400
commit1f3e55fe02d12213f87869768aa2b0bad3ba9a7d (patch)
tree75fc0934a88c67ba00f4c2b30fc00e3c3c281e61 /fs/mbcache.c
parent3e037e5211252902a188a6a11aecd247409d0229 (diff)
fs/mbcache.c: doucple the locking of local from global data
The patch increases the parallelism of mbcache by using the built-in lock in the hlist_bl_node to protect the mb_cache's local block and index hash chains. The global data mb_cache_lru_list and mb_cache_list continue to be protected by the global mb_cache_spinlock. New block group spinlock, mb_cache_bg_lock is also added to serialize accesses to mb_cache_entry's local data. A new member e_refcnt is added to the mb_cache_entry structure to help preventing an mb_cache_entry from being deallocated by a free while it is being referenced by either mb_cache_entry_get() or mb_cache_entry_find(). Signed-off-by: T. Makphaibulchoke <tmac@hp.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/mbcache.c')
-rw-r--r--fs/mbcache.c417
1 files changed, 301 insertions, 116 deletions
diff --git a/fs/mbcache.c b/fs/mbcache.c
index 55db0daaca74..786ecab81c99 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -26,6 +26,41 @@
26 * back on the lru list. 26 * back on the lru list.
27 */ 27 */
28 28
29/*
30 * Lock descriptions and usage:
31 *
32 * Each hash chain of both the block and index hash tables now contains
33 * a built-in lock used to serialize accesses to the hash chain.
34 *
35 * Accesses to global data structures mb_cache_list and mb_cache_lru_list
36 * are serialized via the global spinlock mb_cache_spinlock.
37 *
38 * Each mb_cache_entry contains a spinlock, e_entry_lock, to serialize
39 * accesses to its local data, such as e_used and e_queued.
40 *
41 * Lock ordering:
42 *
43 * Each block hash chain's lock has the highest lock order, followed by an
44 * index hash chain's lock, mb_cache_bg_lock (used to implement mb_cache_entry's
45 * lock), and mb_cach_spinlock, with the lowest order. While holding
46 * either a block or index hash chain lock, a thread can acquire an
47 * mc_cache_bg_lock, which in turn can also acquire mb_cache_spinlock.
48 *
49 * Synchronization:
50 *
51 * Since both mb_cache_entry_get and mb_cache_entry_find scan the block and
52 * index hash chian, it needs to lock the corresponding hash chain. For each
53 * mb_cache_entry within the chain, it needs to lock the mb_cache_entry to
54 * prevent either any simultaneous release or free on the entry and also
55 * to serialize accesses to either the e_used or e_queued member of the entry.
56 *
57 * To avoid having a dangling reference to an already freed
58 * mb_cache_entry, an mb_cache_entry is only freed when it is not on a
59 * block hash chain and also no longer being referenced, both e_used,
60 * and e_queued are 0's. When an mb_cache_entry is explicitly freed it is
61 * first removed from a block hash chain.
62 */
63
29#include <linux/kernel.h> 64#include <linux/kernel.h>
30#include <linux/module.h> 65#include <linux/module.h>
31 66
@@ -37,6 +72,7 @@
37#include <linux/list_bl.h> 72#include <linux/list_bl.h>
38#include <linux/mbcache.h> 73#include <linux/mbcache.h>
39#include <linux/init.h> 74#include <linux/init.h>
75#include <linux/blockgroup_lock.h>
40 76
41#ifdef MB_CACHE_DEBUG 77#ifdef MB_CACHE_DEBUG
42# define mb_debug(f...) do { \ 78# define mb_debug(f...) do { \
@@ -57,8 +93,13 @@
57 93
58#define MB_CACHE_WRITER ((unsigned short)~0U >> 1) 94#define MB_CACHE_WRITER ((unsigned short)~0U >> 1)
59 95
96#define MB_CACHE_ENTRY_LOCK_BITS __builtin_log2(NR_BG_LOCKS)
97#define MB_CACHE_ENTRY_LOCK_INDEX(ce) \
98 (hash_long((unsigned long)ce, MB_CACHE_ENTRY_LOCK_BITS))
99
60static DECLARE_WAIT_QUEUE_HEAD(mb_cache_queue); 100static DECLARE_WAIT_QUEUE_HEAD(mb_cache_queue);
61 101static struct blockgroup_lock *mb_cache_bg_lock;
102
62MODULE_AUTHOR("Andreas Gruenbacher <a.gruenbacher@computer.org>"); 103MODULE_AUTHOR("Andreas Gruenbacher <a.gruenbacher@computer.org>");
63MODULE_DESCRIPTION("Meta block cache (for extended attributes)"); 104MODULE_DESCRIPTION("Meta block cache (for extended attributes)");
64MODULE_LICENSE("GPL"); 105MODULE_LICENSE("GPL");
@@ -86,6 +127,20 @@ static LIST_HEAD(mb_cache_list);
86static LIST_HEAD(mb_cache_lru_list); 127static LIST_HEAD(mb_cache_lru_list);
87static DEFINE_SPINLOCK(mb_cache_spinlock); 128static DEFINE_SPINLOCK(mb_cache_spinlock);
88 129
130static inline void
131__spin_lock_mb_cache_entry(struct mb_cache_entry *ce)
132{
133 spin_lock(bgl_lock_ptr(mb_cache_bg_lock,
134 MB_CACHE_ENTRY_LOCK_INDEX(ce)));
135}
136
137static inline void
138__spin_unlock_mb_cache_entry(struct mb_cache_entry *ce)
139{
140 spin_unlock(bgl_lock_ptr(mb_cache_bg_lock,
141 MB_CACHE_ENTRY_LOCK_INDEX(ce)));
142}
143
89static inline int 144static inline int
90__mb_cache_entry_is_block_hashed(struct mb_cache_entry *ce) 145__mb_cache_entry_is_block_hashed(struct mb_cache_entry *ce)
91{ 146{
@@ -113,11 +168,21 @@ __mb_cache_entry_unhash_index(struct mb_cache_entry *ce)
113 hlist_bl_del_init(&ce->e_index.o_list); 168 hlist_bl_del_init(&ce->e_index.o_list);
114} 169}
115 170
171/*
172 * __mb_cache_entry_unhash_unlock()
173 *
174 * This function is called to unhash both the block and index hash
175 * chain.
176 * It assumes both the block and index hash chain is locked upon entry.
177 * It also unlock both hash chains both exit
178 */
116static inline void 179static inline void
117__mb_cache_entry_unhash(struct mb_cache_entry *ce) 180__mb_cache_entry_unhash_unlock(struct mb_cache_entry *ce)
118{ 181{
119 __mb_cache_entry_unhash_index(ce); 182 __mb_cache_entry_unhash_index(ce);
183 hlist_bl_unlock(ce->e_index_hash_p);
120 __mb_cache_entry_unhash_block(ce); 184 __mb_cache_entry_unhash_block(ce);
185 hlist_bl_unlock(ce->e_block_hash_p);
121} 186}
122 187
123static void 188static void
@@ -125,36 +190,47 @@ __mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask)
125{ 190{
126 struct mb_cache *cache = ce->e_cache; 191 struct mb_cache *cache = ce->e_cache;
127 192
128 mb_assert(!(ce->e_used || ce->e_queued)); 193 mb_assert(!(ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt)));
129 kmem_cache_free(cache->c_entry_cache, ce); 194 kmem_cache_free(cache->c_entry_cache, ce);
130 atomic_dec(&cache->c_entry_count); 195 atomic_dec(&cache->c_entry_count);
131} 196}
132 197
133
134static void 198static void
135__mb_cache_entry_release_unlock(struct mb_cache_entry *ce) 199__mb_cache_entry_release(struct mb_cache_entry *ce)
136 __releases(mb_cache_spinlock)
137{ 200{
201 /* First lock the entry to serialize access to its local data. */
202 __spin_lock_mb_cache_entry(ce);
138 /* Wake up all processes queuing for this cache entry. */ 203 /* Wake up all processes queuing for this cache entry. */
139 if (ce->e_queued) 204 if (ce->e_queued)
140 wake_up_all(&mb_cache_queue); 205 wake_up_all(&mb_cache_queue);
141 if (ce->e_used >= MB_CACHE_WRITER) 206 if (ce->e_used >= MB_CACHE_WRITER)
142 ce->e_used -= MB_CACHE_WRITER; 207 ce->e_used -= MB_CACHE_WRITER;
208 /*
209 * Make sure that all cache entries on lru_list have
210 * both e_used and e_qued of 0s.
211 */
143 ce->e_used--; 212 ce->e_used--;
144 if (!(ce->e_used || ce->e_queued)) { 213 if (!(ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt))) {
145 if (!__mb_cache_entry_is_block_hashed(ce)) 214 if (!__mb_cache_entry_is_block_hashed(ce)) {
215 __spin_unlock_mb_cache_entry(ce);
146 goto forget; 216 goto forget;
147 mb_assert(list_empty(&ce->e_lru_list)); 217 }
148 list_add_tail(&ce->e_lru_list, &mb_cache_lru_list); 218 /*
219 * Need access to lru list, first drop entry lock,
220 * then reacquire the lock in the proper order.
221 */
222 spin_lock(&mb_cache_spinlock);
223 if (list_empty(&ce->e_lru_list))
224 list_add_tail(&ce->e_lru_list, &mb_cache_lru_list);
225 spin_unlock(&mb_cache_spinlock);
149 } 226 }
150 spin_unlock(&mb_cache_spinlock); 227 __spin_unlock_mb_cache_entry(ce);
151 return; 228 return;
152forget: 229forget:
153 spin_unlock(&mb_cache_spinlock); 230 mb_assert(list_empty(&ce->e_lru_list));
154 __mb_cache_entry_forget(ce, GFP_KERNEL); 231 __mb_cache_entry_forget(ce, GFP_KERNEL);
155} 232}
156 233
157
158/* 234/*
159 * mb_cache_shrink_scan() memory pressure callback 235 * mb_cache_shrink_scan() memory pressure callback
160 * 236 *
@@ -177,17 +253,34 @@ mb_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
177 253
178 mb_debug("trying to free %d entries", nr_to_scan); 254 mb_debug("trying to free %d entries", nr_to_scan);
179 spin_lock(&mb_cache_spinlock); 255 spin_lock(&mb_cache_spinlock);
180 while (nr_to_scan-- && !list_empty(&mb_cache_lru_list)) { 256 while ((nr_to_scan-- > 0) && !list_empty(&mb_cache_lru_list)) {
181 struct mb_cache_entry *ce = 257 struct mb_cache_entry *ce =
182 list_entry(mb_cache_lru_list.next, 258 list_entry(mb_cache_lru_list.next,
183 struct mb_cache_entry, e_lru_list); 259 struct mb_cache_entry, e_lru_list);
184 list_move_tail(&ce->e_lru_list, &free_list); 260 list_del_init(&ce->e_lru_list);
185 __mb_cache_entry_unhash(ce); 261 if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt))
186 freed++; 262 continue;
263 spin_unlock(&mb_cache_spinlock);
264 /* Prevent any find or get operation on the entry */
265 hlist_bl_lock(ce->e_block_hash_p);
266 hlist_bl_lock(ce->e_index_hash_p);
267 /* Ignore if it is touched by a find/get */
268 if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt) ||
269 !list_empty(&ce->e_lru_list)) {
270 hlist_bl_unlock(ce->e_index_hash_p);
271 hlist_bl_unlock(ce->e_block_hash_p);
272 spin_lock(&mb_cache_spinlock);
273 continue;
274 }
275 __mb_cache_entry_unhash_unlock(ce);
276 list_add_tail(&ce->e_lru_list, &free_list);
277 spin_lock(&mb_cache_spinlock);
187 } 278 }
188 spin_unlock(&mb_cache_spinlock); 279 spin_unlock(&mb_cache_spinlock);
280
189 list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) { 281 list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) {
190 __mb_cache_entry_forget(entry, gfp_mask); 282 __mb_cache_entry_forget(entry, gfp_mask);
283 freed++;
191 } 284 }
192 return freed; 285 return freed;
193} 286}
@@ -232,6 +325,14 @@ mb_cache_create(const char *name, int bucket_bits)
232 int n, bucket_count = 1 << bucket_bits; 325 int n, bucket_count = 1 << bucket_bits;
233 struct mb_cache *cache = NULL; 326 struct mb_cache *cache = NULL;
234 327
328 if (!mb_cache_bg_lock) {
329 mb_cache_bg_lock = kmalloc(sizeof(struct blockgroup_lock),
330 GFP_KERNEL);
331 if (!mb_cache_bg_lock)
332 return NULL;
333 bgl_lock_init(mb_cache_bg_lock);
334 }
335
235 cache = kmalloc(sizeof(struct mb_cache), GFP_KERNEL); 336 cache = kmalloc(sizeof(struct mb_cache), GFP_KERNEL);
236 if (!cache) 337 if (!cache)
237 return NULL; 338 return NULL;
@@ -290,21 +391,47 @@ void
290mb_cache_shrink(struct block_device *bdev) 391mb_cache_shrink(struct block_device *bdev)
291{ 392{
292 LIST_HEAD(free_list); 393 LIST_HEAD(free_list);
293 struct list_head *l, *ltmp; 394 struct list_head *l;
395 struct mb_cache_entry *ce, *tmp;
294 396
397 l = &mb_cache_lru_list;
295 spin_lock(&mb_cache_spinlock); 398 spin_lock(&mb_cache_spinlock);
296 list_for_each_safe(l, ltmp, &mb_cache_lru_list) { 399 while (!list_is_last(l, &mb_cache_lru_list)) {
297 struct mb_cache_entry *ce = 400 l = l->next;
298 list_entry(l, struct mb_cache_entry, e_lru_list); 401 ce = list_entry(l, struct mb_cache_entry, e_lru_list);
299 if (ce->e_bdev == bdev) { 402 if (ce->e_bdev == bdev) {
300 list_move_tail(&ce->e_lru_list, &free_list); 403 list_del_init(&ce->e_lru_list);
301 __mb_cache_entry_unhash(ce); 404 if (ce->e_used || ce->e_queued ||
405 atomic_read(&ce->e_refcnt))
406 continue;
407 spin_unlock(&mb_cache_spinlock);
408 /*
409 * Prevent any find or get operation on the entry.
410 */
411 hlist_bl_lock(ce->e_block_hash_p);
412 hlist_bl_lock(ce->e_index_hash_p);
413 /* Ignore if it is touched by a find/get */
414 if (ce->e_used || ce->e_queued ||
415 atomic_read(&ce->e_refcnt) ||
416 !list_empty(&ce->e_lru_list)) {
417 hlist_bl_unlock(ce->e_index_hash_p);
418 hlist_bl_unlock(ce->e_block_hash_p);
419 l = &mb_cache_lru_list;
420 spin_lock(&mb_cache_spinlock);
421 continue;
422 }
423 __mb_cache_entry_unhash_unlock(ce);
424 mb_assert(!(ce->e_used || ce->e_queued ||
425 atomic_read(&ce->e_refcnt)));
426 list_add_tail(&ce->e_lru_list, &free_list);
427 l = &mb_cache_lru_list;
428 spin_lock(&mb_cache_spinlock);
302 } 429 }
303 } 430 }
304 spin_unlock(&mb_cache_spinlock); 431 spin_unlock(&mb_cache_spinlock);
305 list_for_each_safe(l, ltmp, &free_list) { 432
306 __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry, 433 list_for_each_entry_safe(ce, tmp, &free_list, e_lru_list) {
307 e_lru_list), GFP_KERNEL); 434 __mb_cache_entry_forget(ce, GFP_KERNEL);
308 } 435 }
309} 436}
310 437
@@ -320,23 +447,27 @@ void
320mb_cache_destroy(struct mb_cache *cache) 447mb_cache_destroy(struct mb_cache *cache)
321{ 448{
322 LIST_HEAD(free_list); 449 LIST_HEAD(free_list);
323 struct list_head *l, *ltmp; 450 struct mb_cache_entry *ce, *tmp;
324 451
325 spin_lock(&mb_cache_spinlock); 452 spin_lock(&mb_cache_spinlock);
326 list_for_each_safe(l, ltmp, &mb_cache_lru_list) { 453 list_for_each_entry_safe(ce, tmp, &mb_cache_lru_list, e_lru_list) {
327 struct mb_cache_entry *ce = 454 if (ce->e_cache == cache)
328 list_entry(l, struct mb_cache_entry, e_lru_list);
329 if (ce->e_cache == cache) {
330 list_move_tail(&ce->e_lru_list, &free_list); 455 list_move_tail(&ce->e_lru_list, &free_list);
331 __mb_cache_entry_unhash(ce);
332 }
333 } 456 }
334 list_del(&cache->c_cache_list); 457 list_del(&cache->c_cache_list);
335 spin_unlock(&mb_cache_spinlock); 458 spin_unlock(&mb_cache_spinlock);
336 459
337 list_for_each_safe(l, ltmp, &free_list) { 460 list_for_each_entry_safe(ce, tmp, &free_list, e_lru_list) {
338 __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry, 461 list_del_init(&ce->e_lru_list);
339 e_lru_list), GFP_KERNEL); 462 /*
463 * Prevent any find or get operation on the entry.
464 */
465 hlist_bl_lock(ce->e_block_hash_p);
466 hlist_bl_lock(ce->e_index_hash_p);
467 mb_assert(!(ce->e_used || ce->e_queued ||
468 atomic_read(&ce->e_refcnt)));
469 __mb_cache_entry_unhash_unlock(ce);
470 __mb_cache_entry_forget(ce, GFP_KERNEL);
340 } 471 }
341 472
342 if (atomic_read(&cache->c_entry_count) > 0) { 473 if (atomic_read(&cache->c_entry_count) > 0) {
@@ -345,8 +476,6 @@ mb_cache_destroy(struct mb_cache *cache)
345 atomic_read(&cache->c_entry_count)); 476 atomic_read(&cache->c_entry_count));
346 } 477 }
347 478
348 kmem_cache_destroy(cache->c_entry_cache);
349
350 kfree(cache->c_index_hash); 479 kfree(cache->c_index_hash);
351 kfree(cache->c_block_hash); 480 kfree(cache->c_block_hash);
352 kfree(cache); 481 kfree(cache);
@@ -363,29 +492,59 @@ mb_cache_destroy(struct mb_cache *cache)
363struct mb_cache_entry * 492struct mb_cache_entry *
364mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags) 493mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags)
365{ 494{
366 struct mb_cache_entry *ce = NULL; 495 struct mb_cache_entry *ce;
367 496
368 if (atomic_read(&cache->c_entry_count) >= cache->c_max_entries) { 497 if (atomic_read(&cache->c_entry_count) >= cache->c_max_entries) {
498 struct list_head *l;
499
500 l = &mb_cache_lru_list;
369 spin_lock(&mb_cache_spinlock); 501 spin_lock(&mb_cache_spinlock);
370 if (!list_empty(&mb_cache_lru_list)) { 502 while (!list_is_last(l, &mb_cache_lru_list)) {
371 ce = list_entry(mb_cache_lru_list.next, 503 l = l->next;
372 struct mb_cache_entry, e_lru_list); 504 ce = list_entry(l, struct mb_cache_entry, e_lru_list);
373 list_del_init(&ce->e_lru_list); 505 if (ce->e_cache == cache) {
374 __mb_cache_entry_unhash(ce); 506 list_del_init(&ce->e_lru_list);
507 if (ce->e_used || ce->e_queued ||
508 atomic_read(&ce->e_refcnt))
509 continue;
510 spin_unlock(&mb_cache_spinlock);
511 /*
512 * Prevent any find or get operation on the
513 * entry.
514 */
515 hlist_bl_lock(ce->e_block_hash_p);
516 hlist_bl_lock(ce->e_index_hash_p);
517 /* Ignore if it is touched by a find/get */
518 if (ce->e_used || ce->e_queued ||
519 atomic_read(&ce->e_refcnt) ||
520 !list_empty(&ce->e_lru_list)) {
521 hlist_bl_unlock(ce->e_index_hash_p);
522 hlist_bl_unlock(ce->e_block_hash_p);
523 l = &mb_cache_lru_list;
524 spin_lock(&mb_cache_spinlock);
525 continue;
526 }
527 mb_assert(list_empty(&ce->e_lru_list));
528 mb_assert(!(ce->e_used || ce->e_queued ||
529 atomic_read(&ce->e_refcnt)));
530 __mb_cache_entry_unhash_unlock(ce);
531 goto found;
532 }
375 } 533 }
376 spin_unlock(&mb_cache_spinlock); 534 spin_unlock(&mb_cache_spinlock);
377 } 535 }
378 if (!ce) { 536
379 ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags); 537 ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags);
380 if (!ce) 538 if (!ce)
381 return NULL; 539 return NULL;
382 atomic_inc(&cache->c_entry_count); 540 atomic_inc(&cache->c_entry_count);
383 INIT_LIST_HEAD(&ce->e_lru_list); 541 INIT_LIST_HEAD(&ce->e_lru_list);
384 INIT_HLIST_BL_NODE(&ce->e_block_list); 542 INIT_HLIST_BL_NODE(&ce->e_block_list);
385 INIT_HLIST_BL_NODE(&ce->e_index.o_list); 543 INIT_HLIST_BL_NODE(&ce->e_index.o_list);
386 ce->e_cache = cache; 544 ce->e_cache = cache;
387 ce->e_queued = 0; 545 ce->e_queued = 0;
388 } 546 atomic_set(&ce->e_refcnt, 0);
547found:
389 ce->e_block_hash_p = &cache->c_block_hash[0]; 548 ce->e_block_hash_p = &cache->c_block_hash[0];
390 ce->e_index_hash_p = &cache->c_index_hash[0]; 549 ce->e_index_hash_p = &cache->c_index_hash[0];
391 ce->e_used = 1 + MB_CACHE_WRITER; 550 ce->e_used = 1 + MB_CACHE_WRITER;
@@ -414,7 +573,6 @@ mb_cache_entry_insert(struct mb_cache_entry *ce, struct block_device *bdev,
414 struct mb_cache *cache = ce->e_cache; 573 struct mb_cache *cache = ce->e_cache;
415 unsigned int bucket; 574 unsigned int bucket;
416 struct hlist_bl_node *l; 575 struct hlist_bl_node *l;
417 int error = -EBUSY;
418 struct hlist_bl_head *block_hash_p; 576 struct hlist_bl_head *block_hash_p;
419 struct hlist_bl_head *index_hash_p; 577 struct hlist_bl_head *index_hash_p;
420 struct mb_cache_entry *lce; 578 struct mb_cache_entry *lce;
@@ -423,26 +581,29 @@ mb_cache_entry_insert(struct mb_cache_entry *ce, struct block_device *bdev,
423 bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), 581 bucket = hash_long((unsigned long)bdev + (block & 0xffffffff),
424 cache->c_bucket_bits); 582 cache->c_bucket_bits);
425 block_hash_p = &cache->c_block_hash[bucket]; 583 block_hash_p = &cache->c_block_hash[bucket];
426 spin_lock(&mb_cache_spinlock); 584 hlist_bl_lock(block_hash_p);
427 hlist_bl_for_each_entry(lce, l, block_hash_p, e_block_list) { 585 hlist_bl_for_each_entry(lce, l, block_hash_p, e_block_list) {
428 if (lce->e_bdev == bdev && lce->e_block == block) 586 if (lce->e_bdev == bdev && lce->e_block == block) {
429 goto out; 587 hlist_bl_unlock(block_hash_p);
588 return -EBUSY;
589 }
430 } 590 }
431 mb_assert(!__mb_cache_entry_is_block_hashed(ce)); 591 mb_assert(!__mb_cache_entry_is_block_hashed(ce));
432 __mb_cache_entry_unhash(ce); 592 __mb_cache_entry_unhash_block(ce);
593 __mb_cache_entry_unhash_index(ce);
433 ce->e_bdev = bdev; 594 ce->e_bdev = bdev;
434 ce->e_block = block; 595 ce->e_block = block;
435 ce->e_block_hash_p = block_hash_p; 596 ce->e_block_hash_p = block_hash_p;
436 ce->e_index.o_key = key; 597 ce->e_index.o_key = key;
598 hlist_bl_add_head(&ce->e_block_list, block_hash_p);
599 hlist_bl_unlock(block_hash_p);
437 bucket = hash_long(key, cache->c_bucket_bits); 600 bucket = hash_long(key, cache->c_bucket_bits);
438 index_hash_p = &cache->c_index_hash[bucket]; 601 index_hash_p = &cache->c_index_hash[bucket];
602 hlist_bl_lock(index_hash_p);
439 ce->e_index_hash_p = index_hash_p; 603 ce->e_index_hash_p = index_hash_p;
440 hlist_bl_add_head(&ce->e_index.o_list, index_hash_p); 604 hlist_bl_add_head(&ce->e_index.o_list, index_hash_p);
441 hlist_bl_add_head(&ce->e_block_list, block_hash_p); 605 hlist_bl_unlock(index_hash_p);
442 error = 0; 606 return 0;
443out:
444 spin_unlock(&mb_cache_spinlock);
445 return error;
446} 607}
447 608
448 609
@@ -456,24 +617,26 @@ out:
456void 617void
457mb_cache_entry_release(struct mb_cache_entry *ce) 618mb_cache_entry_release(struct mb_cache_entry *ce)
458{ 619{
459 spin_lock(&mb_cache_spinlock); 620 __mb_cache_entry_release(ce);
460 __mb_cache_entry_release_unlock(ce);
461} 621}
462 622
463 623
464/* 624/*
465 * mb_cache_entry_free() 625 * mb_cache_entry_free()
466 * 626 *
467 * This is equivalent to the sequence mb_cache_entry_takeout() --
468 * mb_cache_entry_release().
469 */ 627 */
470void 628void
471mb_cache_entry_free(struct mb_cache_entry *ce) 629mb_cache_entry_free(struct mb_cache_entry *ce)
472{ 630{
473 spin_lock(&mb_cache_spinlock); 631 mb_assert(ce);
474 mb_assert(list_empty(&ce->e_lru_list)); 632 mb_assert(list_empty(&ce->e_lru_list));
475 __mb_cache_entry_unhash(ce); 633 hlist_bl_lock(ce->e_index_hash_p);
476 __mb_cache_entry_release_unlock(ce); 634 __mb_cache_entry_unhash_index(ce);
635 hlist_bl_unlock(ce->e_index_hash_p);
636 hlist_bl_lock(ce->e_block_hash_p);
637 __mb_cache_entry_unhash_block(ce);
638 hlist_bl_unlock(ce->e_block_hash_p);
639 __mb_cache_entry_release(ce);
477} 640}
478 641
479 642
@@ -497,39 +660,48 @@ mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev,
497 bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), 660 bucket = hash_long((unsigned long)bdev + (block & 0xffffffff),
498 cache->c_bucket_bits); 661 cache->c_bucket_bits);
499 block_hash_p = &cache->c_block_hash[bucket]; 662 block_hash_p = &cache->c_block_hash[bucket];
500 spin_lock(&mb_cache_spinlock); 663 /* First serialize access to the block corresponding hash chain. */
664 hlist_bl_lock(block_hash_p);
501 hlist_bl_for_each_entry(ce, l, block_hash_p, e_block_list) { 665 hlist_bl_for_each_entry(ce, l, block_hash_p, e_block_list) {
502 mb_assert(ce->e_block_hash_p == block_hash_p); 666 mb_assert(ce->e_block_hash_p == block_hash_p);
503 if (ce->e_bdev == bdev && ce->e_block == block) { 667 if (ce->e_bdev == bdev && ce->e_block == block) {
504 DEFINE_WAIT(wait); 668 /*
669 * Prevent a free from removing the entry.
670 */
671 atomic_inc(&ce->e_refcnt);
672 hlist_bl_unlock(block_hash_p);
673 __spin_lock_mb_cache_entry(ce);
674 atomic_dec(&ce->e_refcnt);
675 if (ce->e_used > 0) {
676 DEFINE_WAIT(wait);
677 while (ce->e_used > 0) {
678 ce->e_queued++;
679 prepare_to_wait(&mb_cache_queue, &wait,
680 TASK_UNINTERRUPTIBLE);
681 __spin_unlock_mb_cache_entry(ce);
682 schedule();
683 __spin_lock_mb_cache_entry(ce);
684 ce->e_queued--;
685 }
686 finish_wait(&mb_cache_queue, &wait);
687 }
688 ce->e_used += 1 + MB_CACHE_WRITER;
689 __spin_unlock_mb_cache_entry(ce);
505 690
506 if (!list_empty(&ce->e_lru_list)) 691 if (!list_empty(&ce->e_lru_list)) {
692 spin_lock(&mb_cache_spinlock);
507 list_del_init(&ce->e_lru_list); 693 list_del_init(&ce->e_lru_list);
508
509 while (ce->e_used > 0) {
510 ce->e_queued++;
511 prepare_to_wait(&mb_cache_queue, &wait,
512 TASK_UNINTERRUPTIBLE);
513 spin_unlock(&mb_cache_spinlock); 694 spin_unlock(&mb_cache_spinlock);
514 schedule();
515 spin_lock(&mb_cache_spinlock);
516 ce->e_queued--;
517 } 695 }
518 finish_wait(&mb_cache_queue, &wait);
519 ce->e_used += 1 + MB_CACHE_WRITER;
520
521 if (!__mb_cache_entry_is_block_hashed(ce)) { 696 if (!__mb_cache_entry_is_block_hashed(ce)) {
522 __mb_cache_entry_release_unlock(ce); 697 __mb_cache_entry_release(ce);
523 return NULL; 698 return NULL;
524 } 699 }
525 goto cleanup; 700 return ce;
526 } 701 }
527 } 702 }
528 ce = NULL; 703 hlist_bl_unlock(block_hash_p);
529 704 return NULL;
530cleanup:
531 spin_unlock(&mb_cache_spinlock);
532 return ce;
533} 705}
534 706
535#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) 707#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0)
@@ -538,40 +710,53 @@ static struct mb_cache_entry *
538__mb_cache_entry_find(struct hlist_bl_node *l, struct hlist_bl_head *head, 710__mb_cache_entry_find(struct hlist_bl_node *l, struct hlist_bl_head *head,
539 struct block_device *bdev, unsigned int key) 711 struct block_device *bdev, unsigned int key)
540{ 712{
713
714 /* The index hash chain is alredy acquire by caller. */
541 while (l != NULL) { 715 while (l != NULL) {
542 struct mb_cache_entry *ce = 716 struct mb_cache_entry *ce =
543 hlist_bl_entry(l, struct mb_cache_entry, 717 hlist_bl_entry(l, struct mb_cache_entry,
544 e_index.o_list); 718 e_index.o_list);
545 mb_assert(ce->e_index_hash_p == head); 719 mb_assert(ce->e_index_hash_p == head);
546 if (ce->e_bdev == bdev && ce->e_index.o_key == key) { 720 if (ce->e_bdev == bdev && ce->e_index.o_key == key) {
547 DEFINE_WAIT(wait); 721 /*
548 722 * Prevent a free from removing the entry.
549 if (!list_empty(&ce->e_lru_list)) 723 */
550 list_del_init(&ce->e_lru_list); 724 atomic_inc(&ce->e_refcnt);
551 725 hlist_bl_unlock(head);
726 __spin_lock_mb_cache_entry(ce);
727 atomic_dec(&ce->e_refcnt);
728 ce->e_used++;
552 /* Incrementing before holding the lock gives readers 729 /* Incrementing before holding the lock gives readers
553 priority over writers. */ 730 priority over writers. */
554 ce->e_used++; 731 if (ce->e_used >= MB_CACHE_WRITER) {
555 while (ce->e_used >= MB_CACHE_WRITER) { 732 DEFINE_WAIT(wait);
556 ce->e_queued++; 733
557 prepare_to_wait(&mb_cache_queue, &wait, 734 while (ce->e_used >= MB_CACHE_WRITER) {
558 TASK_UNINTERRUPTIBLE); 735 ce->e_queued++;
559 spin_unlock(&mb_cache_spinlock); 736 prepare_to_wait(&mb_cache_queue, &wait,
560 schedule(); 737 TASK_UNINTERRUPTIBLE);
738 __spin_unlock_mb_cache_entry(ce);
739 schedule();
740 __spin_lock_mb_cache_entry(ce);
741 ce->e_queued--;
742 }
743 finish_wait(&mb_cache_queue, &wait);
744 }
745 __spin_unlock_mb_cache_entry(ce);
746 if (!list_empty(&ce->e_lru_list)) {
561 spin_lock(&mb_cache_spinlock); 747 spin_lock(&mb_cache_spinlock);
562 ce->e_queued--; 748 list_del_init(&ce->e_lru_list);
749 spin_unlock(&mb_cache_spinlock);
563 } 750 }
564 finish_wait(&mb_cache_queue, &wait);
565
566 if (!__mb_cache_entry_is_block_hashed(ce)) { 751 if (!__mb_cache_entry_is_block_hashed(ce)) {
567 __mb_cache_entry_release_unlock(ce); 752 __mb_cache_entry_release(ce);
568 spin_lock(&mb_cache_spinlock);
569 return ERR_PTR(-EAGAIN); 753 return ERR_PTR(-EAGAIN);
570 } 754 }
571 return ce; 755 return ce;
572 } 756 }
573 l = l->next; 757 l = l->next;
574 } 758 }
759 hlist_bl_unlock(head);
575 return NULL; 760 return NULL;
576} 761}
577 762
@@ -598,12 +783,12 @@ mb_cache_entry_find_first(struct mb_cache *cache, struct block_device *bdev,
598 struct hlist_bl_head *index_hash_p; 783 struct hlist_bl_head *index_hash_p;
599 784
600 index_hash_p = &cache->c_index_hash[bucket]; 785 index_hash_p = &cache->c_index_hash[bucket];
601 spin_lock(&mb_cache_spinlock); 786 hlist_bl_lock(index_hash_p);
602 if (!hlist_bl_empty(index_hash_p)) { 787 if (!hlist_bl_empty(index_hash_p)) {
603 l = hlist_bl_first(index_hash_p); 788 l = hlist_bl_first(index_hash_p);
604 ce = __mb_cache_entry_find(l, index_hash_p, bdev, key); 789 ce = __mb_cache_entry_find(l, index_hash_p, bdev, key);
605 } 790 } else
606 spin_unlock(&mb_cache_spinlock); 791 hlist_bl_unlock(index_hash_p);
607 return ce; 792 return ce;
608} 793}
609 794
@@ -638,11 +823,11 @@ mb_cache_entry_find_next(struct mb_cache_entry *prev,
638 823
639 index_hash_p = &cache->c_index_hash[bucket]; 824 index_hash_p = &cache->c_index_hash[bucket];
640 mb_assert(prev->e_index_hash_p == index_hash_p); 825 mb_assert(prev->e_index_hash_p == index_hash_p);
641 spin_lock(&mb_cache_spinlock); 826 hlist_bl_lock(index_hash_p);
642 mb_assert(!hlist_bl_empty(index_hash_p)); 827 mb_assert(!hlist_bl_empty(index_hash_p));
643 l = prev->e_index.o_list.next; 828 l = prev->e_index.o_list.next;
644 ce = __mb_cache_entry_find(l, index_hash_p, bdev, key); 829 ce = __mb_cache_entry_find(l, index_hash_p, bdev, key);
645 __mb_cache_entry_release_unlock(prev); 830 __mb_cache_entry_release(prev);
646 return ce; 831 return ce;
647} 832}
648 833