aboutsummaryrefslogtreecommitdiffstats
path: root/fs/mbcache.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-04-04 18:39:39 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-04-04 18:39:39 -0400
commit24e7ea3bea94fe05eae5019f5f12bcdc98fc5157 (patch)
tree6e527053ad73b737b5450c52d14ddf53ad4ba9a2 /fs/mbcache.c
parent8e343c8b5c2e3c93d9eebea7702c89d81753c495 (diff)
parentad6599ab3ac98a4474544086e048ce86ec15a4d1 (diff)
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 updates from Ted Ts'o: "Major changes for 3.14 include support for the newly added ZERO_RANGE and COLLAPSE_RANGE fallocate operations, and scalability improvements in the jbd2 layer and in xattr handling when the extended attributes spill over into an external block. Other than that, the usual clean ups and minor bug fixes" * tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (42 commits) ext4: fix premature freeing of partial clusters split across leaf blocks ext4: remove unneeded test of ret variable ext4: fix comment typo ext4: make ext4_block_zero_page_range static ext4: atomically set inode->i_flags in ext4_set_inode_flags() ext4: optimize Hurd tests when reading/writing inodes ext4: kill i_version support for Hurd-castrated file systems ext4: each filesystem creates and uses its own mb_cache fs/mbcache.c: doucple the locking of local from global data fs/mbcache.c: change block and index hash chain to hlist_bl_node ext4: Introduce FALLOC_FL_ZERO_RANGE flag for fallocate ext4: refactor ext4_fallocate code ext4: Update inode i_size after the preallocation ext4: fix partial cluster handling for bigalloc file systems ext4: delete path dealloc code in ext4_ext_handle_uninitialized_extents ext4: only call sync_filesystm() when remounting read-only fs: push sync_filesystem() down to the file system's remount_fs() jbd2: improve error messages for inconsistent journal heads jbd2: minimize region locked by j_list_lock in jbd2_journal_forget() jbd2: minimize region locked by j_list_lock in journal_get_create_access() ...
Diffstat (limited to 'fs/mbcache.c')
-rw-r--r--fs/mbcache.c540
1 files changed, 386 insertions, 154 deletions
diff --git a/fs/mbcache.c b/fs/mbcache.c
index e519e45bf673..bf166e388f0d 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -26,6 +26,41 @@
26 * back on the lru list. 26 * back on the lru list.
27 */ 27 */
28 28
29/*
30 * Lock descriptions and usage:
31 *
32 * Each hash chain of both the block and index hash tables now contains
33 * a built-in lock used to serialize accesses to the hash chain.
34 *
35 * Accesses to global data structures mb_cache_list and mb_cache_lru_list
36 * are serialized via the global spinlock mb_cache_spinlock.
37 *
38 * Each mb_cache_entry contains a spinlock, e_entry_lock, to serialize
39 * accesses to its local data, such as e_used and e_queued.
40 *
41 * Lock ordering:
42 *
43 * Each block hash chain's lock has the highest lock order, followed by an
44 * index hash chain's lock, mb_cache_bg_lock (used to implement mb_cache_entry's
45 * lock), and mb_cach_spinlock, with the lowest order. While holding
46 * either a block or index hash chain lock, a thread can acquire an
47 * mc_cache_bg_lock, which in turn can also acquire mb_cache_spinlock.
48 *
49 * Synchronization:
50 *
51 * Since both mb_cache_entry_get and mb_cache_entry_find scan the block and
52 * index hash chian, it needs to lock the corresponding hash chain. For each
53 * mb_cache_entry within the chain, it needs to lock the mb_cache_entry to
54 * prevent either any simultaneous release or free on the entry and also
55 * to serialize accesses to either the e_used or e_queued member of the entry.
56 *
57 * To avoid having a dangling reference to an already freed
58 * mb_cache_entry, an mb_cache_entry is only freed when it is not on a
59 * block hash chain and also no longer being referenced, both e_used,
60 * and e_queued are 0's. When an mb_cache_entry is explicitly freed it is
61 * first removed from a block hash chain.
62 */
63
29#include <linux/kernel.h> 64#include <linux/kernel.h>
30#include <linux/module.h> 65#include <linux/module.h>
31 66
@@ -34,9 +69,10 @@
34#include <linux/mm.h> 69#include <linux/mm.h>
35#include <linux/slab.h> 70#include <linux/slab.h>
36#include <linux/sched.h> 71#include <linux/sched.h>
37#include <linux/init.h> 72#include <linux/list_bl.h>
38#include <linux/mbcache.h> 73#include <linux/mbcache.h>
39 74#include <linux/init.h>
75#include <linux/blockgroup_lock.h>
40 76
41#ifdef MB_CACHE_DEBUG 77#ifdef MB_CACHE_DEBUG
42# define mb_debug(f...) do { \ 78# define mb_debug(f...) do { \
@@ -57,8 +93,14 @@
57 93
58#define MB_CACHE_WRITER ((unsigned short)~0U >> 1) 94#define MB_CACHE_WRITER ((unsigned short)~0U >> 1)
59 95
96#define MB_CACHE_ENTRY_LOCK_BITS __builtin_log2(NR_BG_LOCKS)
97#define MB_CACHE_ENTRY_LOCK_INDEX(ce) \
98 (hash_long((unsigned long)ce, MB_CACHE_ENTRY_LOCK_BITS))
99
60static DECLARE_WAIT_QUEUE_HEAD(mb_cache_queue); 100static DECLARE_WAIT_QUEUE_HEAD(mb_cache_queue);
61 101static struct blockgroup_lock *mb_cache_bg_lock;
102static struct kmem_cache *mb_cache_kmem_cache;
103
62MODULE_AUTHOR("Andreas Gruenbacher <a.gruenbacher@computer.org>"); 104MODULE_AUTHOR("Andreas Gruenbacher <a.gruenbacher@computer.org>");
63MODULE_DESCRIPTION("Meta block cache (for extended attributes)"); 105MODULE_DESCRIPTION("Meta block cache (for extended attributes)");
64MODULE_LICENSE("GPL"); 106MODULE_LICENSE("GPL");
@@ -86,58 +128,110 @@ static LIST_HEAD(mb_cache_list);
86static LIST_HEAD(mb_cache_lru_list); 128static LIST_HEAD(mb_cache_lru_list);
87static DEFINE_SPINLOCK(mb_cache_spinlock); 129static DEFINE_SPINLOCK(mb_cache_spinlock);
88 130
131static inline void
132__spin_lock_mb_cache_entry(struct mb_cache_entry *ce)
133{
134 spin_lock(bgl_lock_ptr(mb_cache_bg_lock,
135 MB_CACHE_ENTRY_LOCK_INDEX(ce)));
136}
137
138static inline void
139__spin_unlock_mb_cache_entry(struct mb_cache_entry *ce)
140{
141 spin_unlock(bgl_lock_ptr(mb_cache_bg_lock,
142 MB_CACHE_ENTRY_LOCK_INDEX(ce)));
143}
144
89static inline int 145static inline int
90__mb_cache_entry_is_hashed(struct mb_cache_entry *ce) 146__mb_cache_entry_is_block_hashed(struct mb_cache_entry *ce)
91{ 147{
92 return !list_empty(&ce->e_block_list); 148 return !hlist_bl_unhashed(&ce->e_block_list);
93} 149}
94 150
95 151
96static void 152static inline void
97__mb_cache_entry_unhash(struct mb_cache_entry *ce) 153__mb_cache_entry_unhash_block(struct mb_cache_entry *ce)
98{ 154{
99 if (__mb_cache_entry_is_hashed(ce)) { 155 if (__mb_cache_entry_is_block_hashed(ce))
100 list_del_init(&ce->e_block_list); 156 hlist_bl_del_init(&ce->e_block_list);
101 list_del(&ce->e_index.o_list);
102 }
103} 157}
104 158
159static inline int
160__mb_cache_entry_is_index_hashed(struct mb_cache_entry *ce)
161{
162 return !hlist_bl_unhashed(&ce->e_index.o_list);
163}
164
165static inline void
166__mb_cache_entry_unhash_index(struct mb_cache_entry *ce)
167{
168 if (__mb_cache_entry_is_index_hashed(ce))
169 hlist_bl_del_init(&ce->e_index.o_list);
170}
171
172/*
173 * __mb_cache_entry_unhash_unlock()
174 *
175 * This function is called to unhash both the block and index hash
176 * chain.
177 * It assumes both the block and index hash chain is locked upon entry.
178 * It also unlock both hash chains both exit
179 */
180static inline void
181__mb_cache_entry_unhash_unlock(struct mb_cache_entry *ce)
182{
183 __mb_cache_entry_unhash_index(ce);
184 hlist_bl_unlock(ce->e_index_hash_p);
185 __mb_cache_entry_unhash_block(ce);
186 hlist_bl_unlock(ce->e_block_hash_p);
187}
105 188
106static void 189static void
107__mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask) 190__mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask)
108{ 191{
109 struct mb_cache *cache = ce->e_cache; 192 struct mb_cache *cache = ce->e_cache;
110 193
111 mb_assert(!(ce->e_used || ce->e_queued)); 194 mb_assert(!(ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt)));
112 kmem_cache_free(cache->c_entry_cache, ce); 195 kmem_cache_free(cache->c_entry_cache, ce);
113 atomic_dec(&cache->c_entry_count); 196 atomic_dec(&cache->c_entry_count);
114} 197}
115 198
116
117static void 199static void
118__mb_cache_entry_release_unlock(struct mb_cache_entry *ce) 200__mb_cache_entry_release(struct mb_cache_entry *ce)
119 __releases(mb_cache_spinlock)
120{ 201{
202 /* First lock the entry to serialize access to its local data. */
203 __spin_lock_mb_cache_entry(ce);
121 /* Wake up all processes queuing for this cache entry. */ 204 /* Wake up all processes queuing for this cache entry. */
122 if (ce->e_queued) 205 if (ce->e_queued)
123 wake_up_all(&mb_cache_queue); 206 wake_up_all(&mb_cache_queue);
124 if (ce->e_used >= MB_CACHE_WRITER) 207 if (ce->e_used >= MB_CACHE_WRITER)
125 ce->e_used -= MB_CACHE_WRITER; 208 ce->e_used -= MB_CACHE_WRITER;
209 /*
210 * Make sure that all cache entries on lru_list have
211 * both e_used and e_qued of 0s.
212 */
126 ce->e_used--; 213 ce->e_used--;
127 if (!(ce->e_used || ce->e_queued)) { 214 if (!(ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt))) {
128 if (!__mb_cache_entry_is_hashed(ce)) 215 if (!__mb_cache_entry_is_block_hashed(ce)) {
216 __spin_unlock_mb_cache_entry(ce);
129 goto forget; 217 goto forget;
130 mb_assert(list_empty(&ce->e_lru_list)); 218 }
131 list_add_tail(&ce->e_lru_list, &mb_cache_lru_list); 219 /*
220 * Need access to lru list, first drop entry lock,
221 * then reacquire the lock in the proper order.
222 */
223 spin_lock(&mb_cache_spinlock);
224 if (list_empty(&ce->e_lru_list))
225 list_add_tail(&ce->e_lru_list, &mb_cache_lru_list);
226 spin_unlock(&mb_cache_spinlock);
132 } 227 }
133 spin_unlock(&mb_cache_spinlock); 228 __spin_unlock_mb_cache_entry(ce);
134 return; 229 return;
135forget: 230forget:
136 spin_unlock(&mb_cache_spinlock); 231 mb_assert(list_empty(&ce->e_lru_list));
137 __mb_cache_entry_forget(ce, GFP_KERNEL); 232 __mb_cache_entry_forget(ce, GFP_KERNEL);
138} 233}
139 234
140
141/* 235/*
142 * mb_cache_shrink_scan() memory pressure callback 236 * mb_cache_shrink_scan() memory pressure callback
143 * 237 *
@@ -160,17 +254,34 @@ mb_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
160 254
161 mb_debug("trying to free %d entries", nr_to_scan); 255 mb_debug("trying to free %d entries", nr_to_scan);
162 spin_lock(&mb_cache_spinlock); 256 spin_lock(&mb_cache_spinlock);
163 while (nr_to_scan-- && !list_empty(&mb_cache_lru_list)) { 257 while ((nr_to_scan-- > 0) && !list_empty(&mb_cache_lru_list)) {
164 struct mb_cache_entry *ce = 258 struct mb_cache_entry *ce =
165 list_entry(mb_cache_lru_list.next, 259 list_entry(mb_cache_lru_list.next,
166 struct mb_cache_entry, e_lru_list); 260 struct mb_cache_entry, e_lru_list);
167 list_move_tail(&ce->e_lru_list, &free_list); 261 list_del_init(&ce->e_lru_list);
168 __mb_cache_entry_unhash(ce); 262 if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt))
169 freed++; 263 continue;
264 spin_unlock(&mb_cache_spinlock);
265 /* Prevent any find or get operation on the entry */
266 hlist_bl_lock(ce->e_block_hash_p);
267 hlist_bl_lock(ce->e_index_hash_p);
268 /* Ignore if it is touched by a find/get */
269 if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt) ||
270 !list_empty(&ce->e_lru_list)) {
271 hlist_bl_unlock(ce->e_index_hash_p);
272 hlist_bl_unlock(ce->e_block_hash_p);
273 spin_lock(&mb_cache_spinlock);
274 continue;
275 }
276 __mb_cache_entry_unhash_unlock(ce);
277 list_add_tail(&ce->e_lru_list, &free_list);
278 spin_lock(&mb_cache_spinlock);
170 } 279 }
171 spin_unlock(&mb_cache_spinlock); 280 spin_unlock(&mb_cache_spinlock);
281
172 list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) { 282 list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) {
173 __mb_cache_entry_forget(entry, gfp_mask); 283 __mb_cache_entry_forget(entry, gfp_mask);
284 freed++;
174 } 285 }
175 return freed; 286 return freed;
176} 287}
@@ -215,29 +326,40 @@ mb_cache_create(const char *name, int bucket_bits)
215 int n, bucket_count = 1 << bucket_bits; 326 int n, bucket_count = 1 << bucket_bits;
216 struct mb_cache *cache = NULL; 327 struct mb_cache *cache = NULL;
217 328
329 if (!mb_cache_bg_lock) {
330 mb_cache_bg_lock = kmalloc(sizeof(struct blockgroup_lock),
331 GFP_KERNEL);
332 if (!mb_cache_bg_lock)
333 return NULL;
334 bgl_lock_init(mb_cache_bg_lock);
335 }
336
218 cache = kmalloc(sizeof(struct mb_cache), GFP_KERNEL); 337 cache = kmalloc(sizeof(struct mb_cache), GFP_KERNEL);
219 if (!cache) 338 if (!cache)
220 return NULL; 339 return NULL;
221 cache->c_name = name; 340 cache->c_name = name;
222 atomic_set(&cache->c_entry_count, 0); 341 atomic_set(&cache->c_entry_count, 0);
223 cache->c_bucket_bits = bucket_bits; 342 cache->c_bucket_bits = bucket_bits;
224 cache->c_block_hash = kmalloc(bucket_count * sizeof(struct list_head), 343 cache->c_block_hash = kmalloc(bucket_count *
225 GFP_KERNEL); 344 sizeof(struct hlist_bl_head), GFP_KERNEL);
226 if (!cache->c_block_hash) 345 if (!cache->c_block_hash)
227 goto fail; 346 goto fail;
228 for (n=0; n<bucket_count; n++) 347 for (n=0; n<bucket_count; n++)
229 INIT_LIST_HEAD(&cache->c_block_hash[n]); 348 INIT_HLIST_BL_HEAD(&cache->c_block_hash[n]);
230 cache->c_index_hash = kmalloc(bucket_count * sizeof(struct list_head), 349 cache->c_index_hash = kmalloc(bucket_count *
231 GFP_KERNEL); 350 sizeof(struct hlist_bl_head), GFP_KERNEL);
232 if (!cache->c_index_hash) 351 if (!cache->c_index_hash)
233 goto fail; 352 goto fail;
234 for (n=0; n<bucket_count; n++) 353 for (n=0; n<bucket_count; n++)
235 INIT_LIST_HEAD(&cache->c_index_hash[n]); 354 INIT_HLIST_BL_HEAD(&cache->c_index_hash[n]);
236 cache->c_entry_cache = kmem_cache_create(name, 355 if (!mb_cache_kmem_cache) {
237 sizeof(struct mb_cache_entry), 0, 356 mb_cache_kmem_cache = kmem_cache_create(name,
238 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL); 357 sizeof(struct mb_cache_entry), 0,
239 if (!cache->c_entry_cache) 358 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL);
240 goto fail2; 359 if (!mb_cache_kmem_cache)
360 goto fail2;
361 }
362 cache->c_entry_cache = mb_cache_kmem_cache;
241 363
242 /* 364 /*
243 * Set an upper limit on the number of cache entries so that the hash 365 * Set an upper limit on the number of cache entries so that the hash
@@ -273,21 +395,47 @@ void
273mb_cache_shrink(struct block_device *bdev) 395mb_cache_shrink(struct block_device *bdev)
274{ 396{
275 LIST_HEAD(free_list); 397 LIST_HEAD(free_list);
276 struct list_head *l, *ltmp; 398 struct list_head *l;
399 struct mb_cache_entry *ce, *tmp;
277 400
401 l = &mb_cache_lru_list;
278 spin_lock(&mb_cache_spinlock); 402 spin_lock(&mb_cache_spinlock);
279 list_for_each_safe(l, ltmp, &mb_cache_lru_list) { 403 while (!list_is_last(l, &mb_cache_lru_list)) {
280 struct mb_cache_entry *ce = 404 l = l->next;
281 list_entry(l, struct mb_cache_entry, e_lru_list); 405 ce = list_entry(l, struct mb_cache_entry, e_lru_list);
282 if (ce->e_bdev == bdev) { 406 if (ce->e_bdev == bdev) {
283 list_move_tail(&ce->e_lru_list, &free_list); 407 list_del_init(&ce->e_lru_list);
284 __mb_cache_entry_unhash(ce); 408 if (ce->e_used || ce->e_queued ||
409 atomic_read(&ce->e_refcnt))
410 continue;
411 spin_unlock(&mb_cache_spinlock);
412 /*
413 * Prevent any find or get operation on the entry.
414 */
415 hlist_bl_lock(ce->e_block_hash_p);
416 hlist_bl_lock(ce->e_index_hash_p);
417 /* Ignore if it is touched by a find/get */
418 if (ce->e_used || ce->e_queued ||
419 atomic_read(&ce->e_refcnt) ||
420 !list_empty(&ce->e_lru_list)) {
421 hlist_bl_unlock(ce->e_index_hash_p);
422 hlist_bl_unlock(ce->e_block_hash_p);
423 l = &mb_cache_lru_list;
424 spin_lock(&mb_cache_spinlock);
425 continue;
426 }
427 __mb_cache_entry_unhash_unlock(ce);
428 mb_assert(!(ce->e_used || ce->e_queued ||
429 atomic_read(&ce->e_refcnt)));
430 list_add_tail(&ce->e_lru_list, &free_list);
431 l = &mb_cache_lru_list;
432 spin_lock(&mb_cache_spinlock);
285 } 433 }
286 } 434 }
287 spin_unlock(&mb_cache_spinlock); 435 spin_unlock(&mb_cache_spinlock);
288 list_for_each_safe(l, ltmp, &free_list) { 436
289 __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry, 437 list_for_each_entry_safe(ce, tmp, &free_list, e_lru_list) {
290 e_lru_list), GFP_KERNEL); 438 __mb_cache_entry_forget(ce, GFP_KERNEL);
291 } 439 }
292} 440}
293 441
@@ -303,23 +451,27 @@ void
303mb_cache_destroy(struct mb_cache *cache) 451mb_cache_destroy(struct mb_cache *cache)
304{ 452{
305 LIST_HEAD(free_list); 453 LIST_HEAD(free_list);
306 struct list_head *l, *ltmp; 454 struct mb_cache_entry *ce, *tmp;
307 455
308 spin_lock(&mb_cache_spinlock); 456 spin_lock(&mb_cache_spinlock);
309 list_for_each_safe(l, ltmp, &mb_cache_lru_list) { 457 list_for_each_entry_safe(ce, tmp, &mb_cache_lru_list, e_lru_list) {
310 struct mb_cache_entry *ce = 458 if (ce->e_cache == cache)
311 list_entry(l, struct mb_cache_entry, e_lru_list);
312 if (ce->e_cache == cache) {
313 list_move_tail(&ce->e_lru_list, &free_list); 459 list_move_tail(&ce->e_lru_list, &free_list);
314 __mb_cache_entry_unhash(ce);
315 }
316 } 460 }
317 list_del(&cache->c_cache_list); 461 list_del(&cache->c_cache_list);
318 spin_unlock(&mb_cache_spinlock); 462 spin_unlock(&mb_cache_spinlock);
319 463
320 list_for_each_safe(l, ltmp, &free_list) { 464 list_for_each_entry_safe(ce, tmp, &free_list, e_lru_list) {
321 __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry, 465 list_del_init(&ce->e_lru_list);
322 e_lru_list), GFP_KERNEL); 466 /*
467 * Prevent any find or get operation on the entry.
468 */
469 hlist_bl_lock(ce->e_block_hash_p);
470 hlist_bl_lock(ce->e_index_hash_p);
471 mb_assert(!(ce->e_used || ce->e_queued ||
472 atomic_read(&ce->e_refcnt)));
473 __mb_cache_entry_unhash_unlock(ce);
474 __mb_cache_entry_forget(ce, GFP_KERNEL);
323 } 475 }
324 476
325 if (atomic_read(&cache->c_entry_count) > 0) { 477 if (atomic_read(&cache->c_entry_count) > 0) {
@@ -328,8 +480,10 @@ mb_cache_destroy(struct mb_cache *cache)
328 atomic_read(&cache->c_entry_count)); 480 atomic_read(&cache->c_entry_count));
329 } 481 }
330 482
331 kmem_cache_destroy(cache->c_entry_cache); 483 if (list_empty(&mb_cache_list)) {
332 484 kmem_cache_destroy(mb_cache_kmem_cache);
485 mb_cache_kmem_cache = NULL;
486 }
333 kfree(cache->c_index_hash); 487 kfree(cache->c_index_hash);
334 kfree(cache->c_block_hash); 488 kfree(cache->c_block_hash);
335 kfree(cache); 489 kfree(cache);
@@ -346,28 +500,61 @@ mb_cache_destroy(struct mb_cache *cache)
346struct mb_cache_entry * 500struct mb_cache_entry *
347mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags) 501mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags)
348{ 502{
349 struct mb_cache_entry *ce = NULL; 503 struct mb_cache_entry *ce;
350 504
351 if (atomic_read(&cache->c_entry_count) >= cache->c_max_entries) { 505 if (atomic_read(&cache->c_entry_count) >= cache->c_max_entries) {
506 struct list_head *l;
507
508 l = &mb_cache_lru_list;
352 spin_lock(&mb_cache_spinlock); 509 spin_lock(&mb_cache_spinlock);
353 if (!list_empty(&mb_cache_lru_list)) { 510 while (!list_is_last(l, &mb_cache_lru_list)) {
354 ce = list_entry(mb_cache_lru_list.next, 511 l = l->next;
355 struct mb_cache_entry, e_lru_list); 512 ce = list_entry(l, struct mb_cache_entry, e_lru_list);
356 list_del_init(&ce->e_lru_list); 513 if (ce->e_cache == cache) {
357 __mb_cache_entry_unhash(ce); 514 list_del_init(&ce->e_lru_list);
515 if (ce->e_used || ce->e_queued ||
516 atomic_read(&ce->e_refcnt))
517 continue;
518 spin_unlock(&mb_cache_spinlock);
519 /*
520 * Prevent any find or get operation on the
521 * entry.
522 */
523 hlist_bl_lock(ce->e_block_hash_p);
524 hlist_bl_lock(ce->e_index_hash_p);
525 /* Ignore if it is touched by a find/get */
526 if (ce->e_used || ce->e_queued ||
527 atomic_read(&ce->e_refcnt) ||
528 !list_empty(&ce->e_lru_list)) {
529 hlist_bl_unlock(ce->e_index_hash_p);
530 hlist_bl_unlock(ce->e_block_hash_p);
531 l = &mb_cache_lru_list;
532 spin_lock(&mb_cache_spinlock);
533 continue;
534 }
535 mb_assert(list_empty(&ce->e_lru_list));
536 mb_assert(!(ce->e_used || ce->e_queued ||
537 atomic_read(&ce->e_refcnt)));
538 __mb_cache_entry_unhash_unlock(ce);
539 goto found;
540 }
358 } 541 }
359 spin_unlock(&mb_cache_spinlock); 542 spin_unlock(&mb_cache_spinlock);
360 } 543 }
361 if (!ce) { 544
362 ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags); 545 ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags);
363 if (!ce) 546 if (!ce)
364 return NULL; 547 return NULL;
365 atomic_inc(&cache->c_entry_count); 548 atomic_inc(&cache->c_entry_count);
366 INIT_LIST_HEAD(&ce->e_lru_list); 549 INIT_LIST_HEAD(&ce->e_lru_list);
367 INIT_LIST_HEAD(&ce->e_block_list); 550 INIT_HLIST_BL_NODE(&ce->e_block_list);
368 ce->e_cache = cache; 551 INIT_HLIST_BL_NODE(&ce->e_index.o_list);
369 ce->e_queued = 0; 552 ce->e_cache = cache;
370 } 553 ce->e_queued = 0;
554 atomic_set(&ce->e_refcnt, 0);
555found:
556 ce->e_block_hash_p = &cache->c_block_hash[0];
557 ce->e_index_hash_p = &cache->c_index_hash[0];
371 ce->e_used = 1 + MB_CACHE_WRITER; 558 ce->e_used = 1 + MB_CACHE_WRITER;
372 return ce; 559 return ce;
373} 560}
@@ -393,29 +580,38 @@ mb_cache_entry_insert(struct mb_cache_entry *ce, struct block_device *bdev,
393{ 580{
394 struct mb_cache *cache = ce->e_cache; 581 struct mb_cache *cache = ce->e_cache;
395 unsigned int bucket; 582 unsigned int bucket;
396 struct list_head *l; 583 struct hlist_bl_node *l;
397 int error = -EBUSY; 584 struct hlist_bl_head *block_hash_p;
585 struct hlist_bl_head *index_hash_p;
586 struct mb_cache_entry *lce;
398 587
588 mb_assert(ce);
399 bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), 589 bucket = hash_long((unsigned long)bdev + (block & 0xffffffff),
400 cache->c_bucket_bits); 590 cache->c_bucket_bits);
401 spin_lock(&mb_cache_spinlock); 591 block_hash_p = &cache->c_block_hash[bucket];
402 list_for_each_prev(l, &cache->c_block_hash[bucket]) { 592 hlist_bl_lock(block_hash_p);
403 struct mb_cache_entry *ce = 593 hlist_bl_for_each_entry(lce, l, block_hash_p, e_block_list) {
404 list_entry(l, struct mb_cache_entry, e_block_list); 594 if (lce->e_bdev == bdev && lce->e_block == block) {
405 if (ce->e_bdev == bdev && ce->e_block == block) 595 hlist_bl_unlock(block_hash_p);
406 goto out; 596 return -EBUSY;
597 }
407 } 598 }
408 __mb_cache_entry_unhash(ce); 599 mb_assert(!__mb_cache_entry_is_block_hashed(ce));
600 __mb_cache_entry_unhash_block(ce);
601 __mb_cache_entry_unhash_index(ce);
409 ce->e_bdev = bdev; 602 ce->e_bdev = bdev;
410 ce->e_block = block; 603 ce->e_block = block;
411 list_add(&ce->e_block_list, &cache->c_block_hash[bucket]); 604 ce->e_block_hash_p = block_hash_p;
412 ce->e_index.o_key = key; 605 ce->e_index.o_key = key;
606 hlist_bl_add_head(&ce->e_block_list, block_hash_p);
607 hlist_bl_unlock(block_hash_p);
413 bucket = hash_long(key, cache->c_bucket_bits); 608 bucket = hash_long(key, cache->c_bucket_bits);
414 list_add(&ce->e_index.o_list, &cache->c_index_hash[bucket]); 609 index_hash_p = &cache->c_index_hash[bucket];
415 error = 0; 610 hlist_bl_lock(index_hash_p);
416out: 611 ce->e_index_hash_p = index_hash_p;
417 spin_unlock(&mb_cache_spinlock); 612 hlist_bl_add_head(&ce->e_index.o_list, index_hash_p);
418 return error; 613 hlist_bl_unlock(index_hash_p);
614 return 0;
419} 615}
420 616
421 617
@@ -429,24 +625,26 @@ out:
429void 625void
430mb_cache_entry_release(struct mb_cache_entry *ce) 626mb_cache_entry_release(struct mb_cache_entry *ce)
431{ 627{
432 spin_lock(&mb_cache_spinlock); 628 __mb_cache_entry_release(ce);
433 __mb_cache_entry_release_unlock(ce);
434} 629}
435 630
436 631
437/* 632/*
438 * mb_cache_entry_free() 633 * mb_cache_entry_free()
439 * 634 *
440 * This is equivalent to the sequence mb_cache_entry_takeout() --
441 * mb_cache_entry_release().
442 */ 635 */
443void 636void
444mb_cache_entry_free(struct mb_cache_entry *ce) 637mb_cache_entry_free(struct mb_cache_entry *ce)
445{ 638{
446 spin_lock(&mb_cache_spinlock); 639 mb_assert(ce);
447 mb_assert(list_empty(&ce->e_lru_list)); 640 mb_assert(list_empty(&ce->e_lru_list));
448 __mb_cache_entry_unhash(ce); 641 hlist_bl_lock(ce->e_index_hash_p);
449 __mb_cache_entry_release_unlock(ce); 642 __mb_cache_entry_unhash_index(ce);
643 hlist_bl_unlock(ce->e_index_hash_p);
644 hlist_bl_lock(ce->e_block_hash_p);
645 __mb_cache_entry_unhash_block(ce);
646 hlist_bl_unlock(ce->e_block_hash_p);
647 __mb_cache_entry_release(ce);
450} 648}
451 649
452 650
@@ -463,84 +661,110 @@ mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev,
463 sector_t block) 661 sector_t block)
464{ 662{
465 unsigned int bucket; 663 unsigned int bucket;
466 struct list_head *l; 664 struct hlist_bl_node *l;
467 struct mb_cache_entry *ce; 665 struct mb_cache_entry *ce;
666 struct hlist_bl_head *block_hash_p;
468 667
469 bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), 668 bucket = hash_long((unsigned long)bdev + (block & 0xffffffff),
470 cache->c_bucket_bits); 669 cache->c_bucket_bits);
471 spin_lock(&mb_cache_spinlock); 670 block_hash_p = &cache->c_block_hash[bucket];
472 list_for_each(l, &cache->c_block_hash[bucket]) { 671 /* First serialize access to the block corresponding hash chain. */
473 ce = list_entry(l, struct mb_cache_entry, e_block_list); 672 hlist_bl_lock(block_hash_p);
673 hlist_bl_for_each_entry(ce, l, block_hash_p, e_block_list) {
674 mb_assert(ce->e_block_hash_p == block_hash_p);
474 if (ce->e_bdev == bdev && ce->e_block == block) { 675 if (ce->e_bdev == bdev && ce->e_block == block) {
475 DEFINE_WAIT(wait); 676 /*
677 * Prevent a free from removing the entry.
678 */
679 atomic_inc(&ce->e_refcnt);
680 hlist_bl_unlock(block_hash_p);
681 __spin_lock_mb_cache_entry(ce);
682 atomic_dec(&ce->e_refcnt);
683 if (ce->e_used > 0) {
684 DEFINE_WAIT(wait);
685 while (ce->e_used > 0) {
686 ce->e_queued++;
687 prepare_to_wait(&mb_cache_queue, &wait,
688 TASK_UNINTERRUPTIBLE);
689 __spin_unlock_mb_cache_entry(ce);
690 schedule();
691 __spin_lock_mb_cache_entry(ce);
692 ce->e_queued--;
693 }
694 finish_wait(&mb_cache_queue, &wait);
695 }
696 ce->e_used += 1 + MB_CACHE_WRITER;
697 __spin_unlock_mb_cache_entry(ce);
476 698
477 if (!list_empty(&ce->e_lru_list)) 699 if (!list_empty(&ce->e_lru_list)) {
700 spin_lock(&mb_cache_spinlock);
478 list_del_init(&ce->e_lru_list); 701 list_del_init(&ce->e_lru_list);
479
480 while (ce->e_used > 0) {
481 ce->e_queued++;
482 prepare_to_wait(&mb_cache_queue, &wait,
483 TASK_UNINTERRUPTIBLE);
484 spin_unlock(&mb_cache_spinlock); 702 spin_unlock(&mb_cache_spinlock);
485 schedule();
486 spin_lock(&mb_cache_spinlock);
487 ce->e_queued--;
488 } 703 }
489 finish_wait(&mb_cache_queue, &wait); 704 if (!__mb_cache_entry_is_block_hashed(ce)) {
490 ce->e_used += 1 + MB_CACHE_WRITER; 705 __mb_cache_entry_release(ce);
491
492 if (!__mb_cache_entry_is_hashed(ce)) {
493 __mb_cache_entry_release_unlock(ce);
494 return NULL; 706 return NULL;
495 } 707 }
496 goto cleanup; 708 return ce;
497 } 709 }
498 } 710 }
499 ce = NULL; 711 hlist_bl_unlock(block_hash_p);
500 712 return NULL;
501cleanup:
502 spin_unlock(&mb_cache_spinlock);
503 return ce;
504} 713}
505 714
506#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) 715#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0)
507 716
508static struct mb_cache_entry * 717static struct mb_cache_entry *
509__mb_cache_entry_find(struct list_head *l, struct list_head *head, 718__mb_cache_entry_find(struct hlist_bl_node *l, struct hlist_bl_head *head,
510 struct block_device *bdev, unsigned int key) 719 struct block_device *bdev, unsigned int key)
511{ 720{
512 while (l != head) { 721
722 /* The index hash chain is alredy acquire by caller. */
723 while (l != NULL) {
513 struct mb_cache_entry *ce = 724 struct mb_cache_entry *ce =
514 list_entry(l, struct mb_cache_entry, e_index.o_list); 725 hlist_bl_entry(l, struct mb_cache_entry,
726 e_index.o_list);
727 mb_assert(ce->e_index_hash_p == head);
515 if (ce->e_bdev == bdev && ce->e_index.o_key == key) { 728 if (ce->e_bdev == bdev && ce->e_index.o_key == key) {
516 DEFINE_WAIT(wait); 729 /*
517 730 * Prevent a free from removing the entry.
518 if (!list_empty(&ce->e_lru_list)) 731 */
519 list_del_init(&ce->e_lru_list); 732 atomic_inc(&ce->e_refcnt);
520 733 hlist_bl_unlock(head);
734 __spin_lock_mb_cache_entry(ce);
735 atomic_dec(&ce->e_refcnt);
736 ce->e_used++;
521 /* Incrementing before holding the lock gives readers 737 /* Incrementing before holding the lock gives readers
522 priority over writers. */ 738 priority over writers. */
523 ce->e_used++; 739 if (ce->e_used >= MB_CACHE_WRITER) {
524 while (ce->e_used >= MB_CACHE_WRITER) { 740 DEFINE_WAIT(wait);
525 ce->e_queued++; 741
526 prepare_to_wait(&mb_cache_queue, &wait, 742 while (ce->e_used >= MB_CACHE_WRITER) {
527 TASK_UNINTERRUPTIBLE); 743 ce->e_queued++;
528 spin_unlock(&mb_cache_spinlock); 744 prepare_to_wait(&mb_cache_queue, &wait,
529 schedule(); 745 TASK_UNINTERRUPTIBLE);
530 spin_lock(&mb_cache_spinlock); 746 __spin_unlock_mb_cache_entry(ce);
531 ce->e_queued--; 747 schedule();
748 __spin_lock_mb_cache_entry(ce);
749 ce->e_queued--;
750 }
751 finish_wait(&mb_cache_queue, &wait);
532 } 752 }
533 finish_wait(&mb_cache_queue, &wait); 753 __spin_unlock_mb_cache_entry(ce);
534 754 if (!list_empty(&ce->e_lru_list)) {
535 if (!__mb_cache_entry_is_hashed(ce)) {
536 __mb_cache_entry_release_unlock(ce);
537 spin_lock(&mb_cache_spinlock); 755 spin_lock(&mb_cache_spinlock);
756 list_del_init(&ce->e_lru_list);
757 spin_unlock(&mb_cache_spinlock);
758 }
759 if (!__mb_cache_entry_is_block_hashed(ce)) {
760 __mb_cache_entry_release(ce);
538 return ERR_PTR(-EAGAIN); 761 return ERR_PTR(-EAGAIN);
539 } 762 }
540 return ce; 763 return ce;
541 } 764 }
542 l = l->next; 765 l = l->next;
543 } 766 }
767 hlist_bl_unlock(head);
544 return NULL; 768 return NULL;
545} 769}
546 770
@@ -562,13 +786,17 @@ mb_cache_entry_find_first(struct mb_cache *cache, struct block_device *bdev,
562 unsigned int key) 786 unsigned int key)
563{ 787{
564 unsigned int bucket = hash_long(key, cache->c_bucket_bits); 788 unsigned int bucket = hash_long(key, cache->c_bucket_bits);
565 struct list_head *l; 789 struct hlist_bl_node *l;
566 struct mb_cache_entry *ce; 790 struct mb_cache_entry *ce = NULL;
567 791 struct hlist_bl_head *index_hash_p;
568 spin_lock(&mb_cache_spinlock); 792
569 l = cache->c_index_hash[bucket].next; 793 index_hash_p = &cache->c_index_hash[bucket];
570 ce = __mb_cache_entry_find(l, &cache->c_index_hash[bucket], bdev, key); 794 hlist_bl_lock(index_hash_p);
571 spin_unlock(&mb_cache_spinlock); 795 if (!hlist_bl_empty(index_hash_p)) {
796 l = hlist_bl_first(index_hash_p);
797 ce = __mb_cache_entry_find(l, index_hash_p, bdev, key);
798 } else
799 hlist_bl_unlock(index_hash_p);
572 return ce; 800 return ce;
573} 801}
574 802
@@ -597,13 +825,17 @@ mb_cache_entry_find_next(struct mb_cache_entry *prev,
597{ 825{
598 struct mb_cache *cache = prev->e_cache; 826 struct mb_cache *cache = prev->e_cache;
599 unsigned int bucket = hash_long(key, cache->c_bucket_bits); 827 unsigned int bucket = hash_long(key, cache->c_bucket_bits);
600 struct list_head *l; 828 struct hlist_bl_node *l;
601 struct mb_cache_entry *ce; 829 struct mb_cache_entry *ce;
830 struct hlist_bl_head *index_hash_p;
602 831
603 spin_lock(&mb_cache_spinlock); 832 index_hash_p = &cache->c_index_hash[bucket];
833 mb_assert(prev->e_index_hash_p == index_hash_p);
834 hlist_bl_lock(index_hash_p);
835 mb_assert(!hlist_bl_empty(index_hash_p));
604 l = prev->e_index.o_list.next; 836 l = prev->e_index.o_list.next;
605 ce = __mb_cache_entry_find(l, &cache->c_index_hash[bucket], bdev, key); 837 ce = __mb_cache_entry_find(l, index_hash_p, bdev, key);
606 __mb_cache_entry_release_unlock(prev); 838 __mb_cache_entry_release(prev);
607 return ce; 839 return ce;
608} 840}
609 841