diff options
Diffstat (limited to 'fs/mbcache.c')
| -rw-r--r-- | fs/mbcache.c | 540 |
1 files changed, 386 insertions, 154 deletions
diff --git a/fs/mbcache.c b/fs/mbcache.c index e519e45bf673..bf166e388f0d 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c | |||
| @@ -26,6 +26,41 @@ | |||
| 26 | * back on the lru list. | 26 | * back on the lru list. |
| 27 | */ | 27 | */ |
| 28 | 28 | ||
| 29 | /* | ||
| 30 | * Lock descriptions and usage: | ||
| 31 | * | ||
| 32 | * Each hash chain of both the block and index hash tables now contains | ||
| 33 | * a built-in lock used to serialize accesses to the hash chain. | ||
| 34 | * | ||
| 35 | * Accesses to global data structures mb_cache_list and mb_cache_lru_list | ||
| 36 | * are serialized via the global spinlock mb_cache_spinlock. | ||
| 37 | * | ||
| 38 | * Each mb_cache_entry contains a spinlock, e_entry_lock, to serialize | ||
| 39 | * accesses to its local data, such as e_used and e_queued. | ||
| 40 | * | ||
| 41 | * Lock ordering: | ||
| 42 | * | ||
| 43 | * Each block hash chain's lock has the highest lock order, followed by an | ||
| 44 | * index hash chain's lock, mb_cache_bg_lock (used to implement mb_cache_entry's | ||
| 45 | * lock), and mb_cach_spinlock, with the lowest order. While holding | ||
| 46 | * either a block or index hash chain lock, a thread can acquire an | ||
| 47 | * mc_cache_bg_lock, which in turn can also acquire mb_cache_spinlock. | ||
| 48 | * | ||
| 49 | * Synchronization: | ||
| 50 | * | ||
| 51 | * Since both mb_cache_entry_get and mb_cache_entry_find scan the block and | ||
| 52 | * index hash chian, it needs to lock the corresponding hash chain. For each | ||
| 53 | * mb_cache_entry within the chain, it needs to lock the mb_cache_entry to | ||
| 54 | * prevent either any simultaneous release or free on the entry and also | ||
| 55 | * to serialize accesses to either the e_used or e_queued member of the entry. | ||
| 56 | * | ||
| 57 | * To avoid having a dangling reference to an already freed | ||
| 58 | * mb_cache_entry, an mb_cache_entry is only freed when it is not on a | ||
| 59 | * block hash chain and also no longer being referenced, both e_used, | ||
| 60 | * and e_queued are 0's. When an mb_cache_entry is explicitly freed it is | ||
| 61 | * first removed from a block hash chain. | ||
| 62 | */ | ||
| 63 | |||
| 29 | #include <linux/kernel.h> | 64 | #include <linux/kernel.h> |
| 30 | #include <linux/module.h> | 65 | #include <linux/module.h> |
| 31 | 66 | ||
| @@ -34,9 +69,10 @@ | |||
| 34 | #include <linux/mm.h> | 69 | #include <linux/mm.h> |
| 35 | #include <linux/slab.h> | 70 | #include <linux/slab.h> |
| 36 | #include <linux/sched.h> | 71 | #include <linux/sched.h> |
| 37 | #include <linux/init.h> | 72 | #include <linux/list_bl.h> |
| 38 | #include <linux/mbcache.h> | 73 | #include <linux/mbcache.h> |
| 39 | 74 | #include <linux/init.h> | |
| 75 | #include <linux/blockgroup_lock.h> | ||
| 40 | 76 | ||
| 41 | #ifdef MB_CACHE_DEBUG | 77 | #ifdef MB_CACHE_DEBUG |
| 42 | # define mb_debug(f...) do { \ | 78 | # define mb_debug(f...) do { \ |
| @@ -57,8 +93,14 @@ | |||
| 57 | 93 | ||
| 58 | #define MB_CACHE_WRITER ((unsigned short)~0U >> 1) | 94 | #define MB_CACHE_WRITER ((unsigned short)~0U >> 1) |
| 59 | 95 | ||
| 96 | #define MB_CACHE_ENTRY_LOCK_BITS __builtin_log2(NR_BG_LOCKS) | ||
| 97 | #define MB_CACHE_ENTRY_LOCK_INDEX(ce) \ | ||
| 98 | (hash_long((unsigned long)ce, MB_CACHE_ENTRY_LOCK_BITS)) | ||
| 99 | |||
| 60 | static DECLARE_WAIT_QUEUE_HEAD(mb_cache_queue); | 100 | static DECLARE_WAIT_QUEUE_HEAD(mb_cache_queue); |
| 61 | 101 | static struct blockgroup_lock *mb_cache_bg_lock; | |
| 102 | static struct kmem_cache *mb_cache_kmem_cache; | ||
| 103 | |||
| 62 | MODULE_AUTHOR("Andreas Gruenbacher <a.gruenbacher@computer.org>"); | 104 | MODULE_AUTHOR("Andreas Gruenbacher <a.gruenbacher@computer.org>"); |
| 63 | MODULE_DESCRIPTION("Meta block cache (for extended attributes)"); | 105 | MODULE_DESCRIPTION("Meta block cache (for extended attributes)"); |
| 64 | MODULE_LICENSE("GPL"); | 106 | MODULE_LICENSE("GPL"); |
| @@ -86,58 +128,110 @@ static LIST_HEAD(mb_cache_list); | |||
| 86 | static LIST_HEAD(mb_cache_lru_list); | 128 | static LIST_HEAD(mb_cache_lru_list); |
| 87 | static DEFINE_SPINLOCK(mb_cache_spinlock); | 129 | static DEFINE_SPINLOCK(mb_cache_spinlock); |
| 88 | 130 | ||
| 131 | static inline void | ||
| 132 | __spin_lock_mb_cache_entry(struct mb_cache_entry *ce) | ||
| 133 | { | ||
| 134 | spin_lock(bgl_lock_ptr(mb_cache_bg_lock, | ||
| 135 | MB_CACHE_ENTRY_LOCK_INDEX(ce))); | ||
| 136 | } | ||
| 137 | |||
| 138 | static inline void | ||
| 139 | __spin_unlock_mb_cache_entry(struct mb_cache_entry *ce) | ||
| 140 | { | ||
| 141 | spin_unlock(bgl_lock_ptr(mb_cache_bg_lock, | ||
| 142 | MB_CACHE_ENTRY_LOCK_INDEX(ce))); | ||
| 143 | } | ||
| 144 | |||
| 89 | static inline int | 145 | static inline int |
| 90 | __mb_cache_entry_is_hashed(struct mb_cache_entry *ce) | 146 | __mb_cache_entry_is_block_hashed(struct mb_cache_entry *ce) |
| 91 | { | 147 | { |
| 92 | return !list_empty(&ce->e_block_list); | 148 | return !hlist_bl_unhashed(&ce->e_block_list); |
| 93 | } | 149 | } |
| 94 | 150 | ||
| 95 | 151 | ||
| 96 | static void | 152 | static inline void |
| 97 | __mb_cache_entry_unhash(struct mb_cache_entry *ce) | 153 | __mb_cache_entry_unhash_block(struct mb_cache_entry *ce) |
| 98 | { | 154 | { |
| 99 | if (__mb_cache_entry_is_hashed(ce)) { | 155 | if (__mb_cache_entry_is_block_hashed(ce)) |
| 100 | list_del_init(&ce->e_block_list); | 156 | hlist_bl_del_init(&ce->e_block_list); |
| 101 | list_del(&ce->e_index.o_list); | ||
| 102 | } | ||
| 103 | } | 157 | } |
| 104 | 158 | ||
| 159 | static inline int | ||
| 160 | __mb_cache_entry_is_index_hashed(struct mb_cache_entry *ce) | ||
| 161 | { | ||
| 162 | return !hlist_bl_unhashed(&ce->e_index.o_list); | ||
| 163 | } | ||
| 164 | |||
| 165 | static inline void | ||
| 166 | __mb_cache_entry_unhash_index(struct mb_cache_entry *ce) | ||
| 167 | { | ||
| 168 | if (__mb_cache_entry_is_index_hashed(ce)) | ||
| 169 | hlist_bl_del_init(&ce->e_index.o_list); | ||
| 170 | } | ||
| 171 | |||
| 172 | /* | ||
| 173 | * __mb_cache_entry_unhash_unlock() | ||
| 174 | * | ||
| 175 | * This function is called to unhash both the block and index hash | ||
| 176 | * chain. | ||
| 177 | * It assumes both the block and index hash chain is locked upon entry. | ||
| 178 | * It also unlock both hash chains both exit | ||
| 179 | */ | ||
| 180 | static inline void | ||
| 181 | __mb_cache_entry_unhash_unlock(struct mb_cache_entry *ce) | ||
| 182 | { | ||
| 183 | __mb_cache_entry_unhash_index(ce); | ||
| 184 | hlist_bl_unlock(ce->e_index_hash_p); | ||
| 185 | __mb_cache_entry_unhash_block(ce); | ||
| 186 | hlist_bl_unlock(ce->e_block_hash_p); | ||
| 187 | } | ||
| 105 | 188 | ||
| 106 | static void | 189 | static void |
| 107 | __mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask) | 190 | __mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask) |
| 108 | { | 191 | { |
| 109 | struct mb_cache *cache = ce->e_cache; | 192 | struct mb_cache *cache = ce->e_cache; |
| 110 | 193 | ||
| 111 | mb_assert(!(ce->e_used || ce->e_queued)); | 194 | mb_assert(!(ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt))); |
| 112 | kmem_cache_free(cache->c_entry_cache, ce); | 195 | kmem_cache_free(cache->c_entry_cache, ce); |
| 113 | atomic_dec(&cache->c_entry_count); | 196 | atomic_dec(&cache->c_entry_count); |
| 114 | } | 197 | } |
| 115 | 198 | ||
| 116 | |||
| 117 | static void | 199 | static void |
| 118 | __mb_cache_entry_release_unlock(struct mb_cache_entry *ce) | 200 | __mb_cache_entry_release(struct mb_cache_entry *ce) |
| 119 | __releases(mb_cache_spinlock) | ||
| 120 | { | 201 | { |
| 202 | /* First lock the entry to serialize access to its local data. */ | ||
| 203 | __spin_lock_mb_cache_entry(ce); | ||
| 121 | /* Wake up all processes queuing for this cache entry. */ | 204 | /* Wake up all processes queuing for this cache entry. */ |
| 122 | if (ce->e_queued) | 205 | if (ce->e_queued) |
| 123 | wake_up_all(&mb_cache_queue); | 206 | wake_up_all(&mb_cache_queue); |
| 124 | if (ce->e_used >= MB_CACHE_WRITER) | 207 | if (ce->e_used >= MB_CACHE_WRITER) |
| 125 | ce->e_used -= MB_CACHE_WRITER; | 208 | ce->e_used -= MB_CACHE_WRITER; |
| 209 | /* | ||
| 210 | * Make sure that all cache entries on lru_list have | ||
| 211 | * both e_used and e_qued of 0s. | ||
| 212 | */ | ||
| 126 | ce->e_used--; | 213 | ce->e_used--; |
| 127 | if (!(ce->e_used || ce->e_queued)) { | 214 | if (!(ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt))) { |
| 128 | if (!__mb_cache_entry_is_hashed(ce)) | 215 | if (!__mb_cache_entry_is_block_hashed(ce)) { |
| 216 | __spin_unlock_mb_cache_entry(ce); | ||
| 129 | goto forget; | 217 | goto forget; |
| 130 | mb_assert(list_empty(&ce->e_lru_list)); | 218 | } |
| 131 | list_add_tail(&ce->e_lru_list, &mb_cache_lru_list); | 219 | /* |
| 220 | * Need access to lru list, first drop entry lock, | ||
| 221 | * then reacquire the lock in the proper order. | ||
| 222 | */ | ||
| 223 | spin_lock(&mb_cache_spinlock); | ||
| 224 | if (list_empty(&ce->e_lru_list)) | ||
| 225 | list_add_tail(&ce->e_lru_list, &mb_cache_lru_list); | ||
| 226 | spin_unlock(&mb_cache_spinlock); | ||
| 132 | } | 227 | } |
| 133 | spin_unlock(&mb_cache_spinlock); | 228 | __spin_unlock_mb_cache_entry(ce); |
| 134 | return; | 229 | return; |
| 135 | forget: | 230 | forget: |
| 136 | spin_unlock(&mb_cache_spinlock); | 231 | mb_assert(list_empty(&ce->e_lru_list)); |
| 137 | __mb_cache_entry_forget(ce, GFP_KERNEL); | 232 | __mb_cache_entry_forget(ce, GFP_KERNEL); |
| 138 | } | 233 | } |
| 139 | 234 | ||
| 140 | |||
| 141 | /* | 235 | /* |
| 142 | * mb_cache_shrink_scan() memory pressure callback | 236 | * mb_cache_shrink_scan() memory pressure callback |
| 143 | * | 237 | * |
| @@ -160,17 +254,34 @@ mb_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) | |||
| 160 | 254 | ||
| 161 | mb_debug("trying to free %d entries", nr_to_scan); | 255 | mb_debug("trying to free %d entries", nr_to_scan); |
| 162 | spin_lock(&mb_cache_spinlock); | 256 | spin_lock(&mb_cache_spinlock); |
| 163 | while (nr_to_scan-- && !list_empty(&mb_cache_lru_list)) { | 257 | while ((nr_to_scan-- > 0) && !list_empty(&mb_cache_lru_list)) { |
| 164 | struct mb_cache_entry *ce = | 258 | struct mb_cache_entry *ce = |
| 165 | list_entry(mb_cache_lru_list.next, | 259 | list_entry(mb_cache_lru_list.next, |
| 166 | struct mb_cache_entry, e_lru_list); | 260 | struct mb_cache_entry, e_lru_list); |
| 167 | list_move_tail(&ce->e_lru_list, &free_list); | 261 | list_del_init(&ce->e_lru_list); |
| 168 | __mb_cache_entry_unhash(ce); | 262 | if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt)) |
| 169 | freed++; | 263 | continue; |
| 264 | spin_unlock(&mb_cache_spinlock); | ||
| 265 | /* Prevent any find or get operation on the entry */ | ||
| 266 | hlist_bl_lock(ce->e_block_hash_p); | ||
| 267 | hlist_bl_lock(ce->e_index_hash_p); | ||
| 268 | /* Ignore if it is touched by a find/get */ | ||
| 269 | if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt) || | ||
| 270 | !list_empty(&ce->e_lru_list)) { | ||
| 271 | hlist_bl_unlock(ce->e_index_hash_p); | ||
| 272 | hlist_bl_unlock(ce->e_block_hash_p); | ||
| 273 | spin_lock(&mb_cache_spinlock); | ||
| 274 | continue; | ||
| 275 | } | ||
| 276 | __mb_cache_entry_unhash_unlock(ce); | ||
| 277 | list_add_tail(&ce->e_lru_list, &free_list); | ||
| 278 | spin_lock(&mb_cache_spinlock); | ||
| 170 | } | 279 | } |
| 171 | spin_unlock(&mb_cache_spinlock); | 280 | spin_unlock(&mb_cache_spinlock); |
| 281 | |||
| 172 | list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) { | 282 | list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) { |
| 173 | __mb_cache_entry_forget(entry, gfp_mask); | 283 | __mb_cache_entry_forget(entry, gfp_mask); |
| 284 | freed++; | ||
| 174 | } | 285 | } |
| 175 | return freed; | 286 | return freed; |
| 176 | } | 287 | } |
| @@ -215,29 +326,40 @@ mb_cache_create(const char *name, int bucket_bits) | |||
| 215 | int n, bucket_count = 1 << bucket_bits; | 326 | int n, bucket_count = 1 << bucket_bits; |
| 216 | struct mb_cache *cache = NULL; | 327 | struct mb_cache *cache = NULL; |
| 217 | 328 | ||
| 329 | if (!mb_cache_bg_lock) { | ||
| 330 | mb_cache_bg_lock = kmalloc(sizeof(struct blockgroup_lock), | ||
| 331 | GFP_KERNEL); | ||
| 332 | if (!mb_cache_bg_lock) | ||
| 333 | return NULL; | ||
| 334 | bgl_lock_init(mb_cache_bg_lock); | ||
| 335 | } | ||
| 336 | |||
| 218 | cache = kmalloc(sizeof(struct mb_cache), GFP_KERNEL); | 337 | cache = kmalloc(sizeof(struct mb_cache), GFP_KERNEL); |
| 219 | if (!cache) | 338 | if (!cache) |
| 220 | return NULL; | 339 | return NULL; |
| 221 | cache->c_name = name; | 340 | cache->c_name = name; |
| 222 | atomic_set(&cache->c_entry_count, 0); | 341 | atomic_set(&cache->c_entry_count, 0); |
| 223 | cache->c_bucket_bits = bucket_bits; | 342 | cache->c_bucket_bits = bucket_bits; |
| 224 | cache->c_block_hash = kmalloc(bucket_count * sizeof(struct list_head), | 343 | cache->c_block_hash = kmalloc(bucket_count * |
| 225 | GFP_KERNEL); | 344 | sizeof(struct hlist_bl_head), GFP_KERNEL); |
| 226 | if (!cache->c_block_hash) | 345 | if (!cache->c_block_hash) |
| 227 | goto fail; | 346 | goto fail; |
| 228 | for (n=0; n<bucket_count; n++) | 347 | for (n=0; n<bucket_count; n++) |
| 229 | INIT_LIST_HEAD(&cache->c_block_hash[n]); | 348 | INIT_HLIST_BL_HEAD(&cache->c_block_hash[n]); |
| 230 | cache->c_index_hash = kmalloc(bucket_count * sizeof(struct list_head), | 349 | cache->c_index_hash = kmalloc(bucket_count * |
| 231 | GFP_KERNEL); | 350 | sizeof(struct hlist_bl_head), GFP_KERNEL); |
| 232 | if (!cache->c_index_hash) | 351 | if (!cache->c_index_hash) |
| 233 | goto fail; | 352 | goto fail; |
| 234 | for (n=0; n<bucket_count; n++) | 353 | for (n=0; n<bucket_count; n++) |
| 235 | INIT_LIST_HEAD(&cache->c_index_hash[n]); | 354 | INIT_HLIST_BL_HEAD(&cache->c_index_hash[n]); |
| 236 | cache->c_entry_cache = kmem_cache_create(name, | 355 | if (!mb_cache_kmem_cache) { |
| 237 | sizeof(struct mb_cache_entry), 0, | 356 | mb_cache_kmem_cache = kmem_cache_create(name, |
| 238 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL); | 357 | sizeof(struct mb_cache_entry), 0, |
| 239 | if (!cache->c_entry_cache) | 358 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL); |
| 240 | goto fail2; | 359 | if (!mb_cache_kmem_cache) |
| 360 | goto fail2; | ||
| 361 | } | ||
| 362 | cache->c_entry_cache = mb_cache_kmem_cache; | ||
| 241 | 363 | ||
| 242 | /* | 364 | /* |
| 243 | * Set an upper limit on the number of cache entries so that the hash | 365 | * Set an upper limit on the number of cache entries so that the hash |
| @@ -273,21 +395,47 @@ void | |||
| 273 | mb_cache_shrink(struct block_device *bdev) | 395 | mb_cache_shrink(struct block_device *bdev) |
| 274 | { | 396 | { |
| 275 | LIST_HEAD(free_list); | 397 | LIST_HEAD(free_list); |
| 276 | struct list_head *l, *ltmp; | 398 | struct list_head *l; |
| 399 | struct mb_cache_entry *ce, *tmp; | ||
| 277 | 400 | ||
| 401 | l = &mb_cache_lru_list; | ||
| 278 | spin_lock(&mb_cache_spinlock); | 402 | spin_lock(&mb_cache_spinlock); |
| 279 | list_for_each_safe(l, ltmp, &mb_cache_lru_list) { | 403 | while (!list_is_last(l, &mb_cache_lru_list)) { |
| 280 | struct mb_cache_entry *ce = | 404 | l = l->next; |
| 281 | list_entry(l, struct mb_cache_entry, e_lru_list); | 405 | ce = list_entry(l, struct mb_cache_entry, e_lru_list); |
| 282 | if (ce->e_bdev == bdev) { | 406 | if (ce->e_bdev == bdev) { |
| 283 | list_move_tail(&ce->e_lru_list, &free_list); | 407 | list_del_init(&ce->e_lru_list); |
| 284 | __mb_cache_entry_unhash(ce); | 408 | if (ce->e_used || ce->e_queued || |
| 409 | atomic_read(&ce->e_refcnt)) | ||
| 410 | continue; | ||
| 411 | spin_unlock(&mb_cache_spinlock); | ||
| 412 | /* | ||
| 413 | * Prevent any find or get operation on the entry. | ||
| 414 | */ | ||
| 415 | hlist_bl_lock(ce->e_block_hash_p); | ||
| 416 | hlist_bl_lock(ce->e_index_hash_p); | ||
| 417 | /* Ignore if it is touched by a find/get */ | ||
| 418 | if (ce->e_used || ce->e_queued || | ||
| 419 | atomic_read(&ce->e_refcnt) || | ||
| 420 | !list_empty(&ce->e_lru_list)) { | ||
| 421 | hlist_bl_unlock(ce->e_index_hash_p); | ||
| 422 | hlist_bl_unlock(ce->e_block_hash_p); | ||
| 423 | l = &mb_cache_lru_list; | ||
| 424 | spin_lock(&mb_cache_spinlock); | ||
| 425 | continue; | ||
| 426 | } | ||
| 427 | __mb_cache_entry_unhash_unlock(ce); | ||
| 428 | mb_assert(!(ce->e_used || ce->e_queued || | ||
| 429 | atomic_read(&ce->e_refcnt))); | ||
| 430 | list_add_tail(&ce->e_lru_list, &free_list); | ||
| 431 | l = &mb_cache_lru_list; | ||
| 432 | spin_lock(&mb_cache_spinlock); | ||
| 285 | } | 433 | } |
| 286 | } | 434 | } |
| 287 | spin_unlock(&mb_cache_spinlock); | 435 | spin_unlock(&mb_cache_spinlock); |
| 288 | list_for_each_safe(l, ltmp, &free_list) { | 436 | |
| 289 | __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry, | 437 | list_for_each_entry_safe(ce, tmp, &free_list, e_lru_list) { |
| 290 | e_lru_list), GFP_KERNEL); | 438 | __mb_cache_entry_forget(ce, GFP_KERNEL); |
| 291 | } | 439 | } |
| 292 | } | 440 | } |
| 293 | 441 | ||
| @@ -303,23 +451,27 @@ void | |||
| 303 | mb_cache_destroy(struct mb_cache *cache) | 451 | mb_cache_destroy(struct mb_cache *cache) |
| 304 | { | 452 | { |
| 305 | LIST_HEAD(free_list); | 453 | LIST_HEAD(free_list); |
| 306 | struct list_head *l, *ltmp; | 454 | struct mb_cache_entry *ce, *tmp; |
| 307 | 455 | ||
| 308 | spin_lock(&mb_cache_spinlock); | 456 | spin_lock(&mb_cache_spinlock); |
| 309 | list_for_each_safe(l, ltmp, &mb_cache_lru_list) { | 457 | list_for_each_entry_safe(ce, tmp, &mb_cache_lru_list, e_lru_list) { |
| 310 | struct mb_cache_entry *ce = | 458 | if (ce->e_cache == cache) |
| 311 | list_entry(l, struct mb_cache_entry, e_lru_list); | ||
| 312 | if (ce->e_cache == cache) { | ||
| 313 | list_move_tail(&ce->e_lru_list, &free_list); | 459 | list_move_tail(&ce->e_lru_list, &free_list); |
| 314 | __mb_cache_entry_unhash(ce); | ||
| 315 | } | ||
| 316 | } | 460 | } |
| 317 | list_del(&cache->c_cache_list); | 461 | list_del(&cache->c_cache_list); |
| 318 | spin_unlock(&mb_cache_spinlock); | 462 | spin_unlock(&mb_cache_spinlock); |
| 319 | 463 | ||
| 320 | list_for_each_safe(l, ltmp, &free_list) { | 464 | list_for_each_entry_safe(ce, tmp, &free_list, e_lru_list) { |
| 321 | __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry, | 465 | list_del_init(&ce->e_lru_list); |
| 322 | e_lru_list), GFP_KERNEL); | 466 | /* |
| 467 | * Prevent any find or get operation on the entry. | ||
| 468 | */ | ||
| 469 | hlist_bl_lock(ce->e_block_hash_p); | ||
| 470 | hlist_bl_lock(ce->e_index_hash_p); | ||
| 471 | mb_assert(!(ce->e_used || ce->e_queued || | ||
| 472 | atomic_read(&ce->e_refcnt))); | ||
| 473 | __mb_cache_entry_unhash_unlock(ce); | ||
| 474 | __mb_cache_entry_forget(ce, GFP_KERNEL); | ||
| 323 | } | 475 | } |
| 324 | 476 | ||
| 325 | if (atomic_read(&cache->c_entry_count) > 0) { | 477 | if (atomic_read(&cache->c_entry_count) > 0) { |
| @@ -328,8 +480,10 @@ mb_cache_destroy(struct mb_cache *cache) | |||
| 328 | atomic_read(&cache->c_entry_count)); | 480 | atomic_read(&cache->c_entry_count)); |
| 329 | } | 481 | } |
| 330 | 482 | ||
| 331 | kmem_cache_destroy(cache->c_entry_cache); | 483 | if (list_empty(&mb_cache_list)) { |
| 332 | 484 | kmem_cache_destroy(mb_cache_kmem_cache); | |
| 485 | mb_cache_kmem_cache = NULL; | ||
| 486 | } | ||
| 333 | kfree(cache->c_index_hash); | 487 | kfree(cache->c_index_hash); |
| 334 | kfree(cache->c_block_hash); | 488 | kfree(cache->c_block_hash); |
| 335 | kfree(cache); | 489 | kfree(cache); |
| @@ -346,28 +500,61 @@ mb_cache_destroy(struct mb_cache *cache) | |||
| 346 | struct mb_cache_entry * | 500 | struct mb_cache_entry * |
| 347 | mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags) | 501 | mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags) |
| 348 | { | 502 | { |
| 349 | struct mb_cache_entry *ce = NULL; | 503 | struct mb_cache_entry *ce; |
| 350 | 504 | ||
| 351 | if (atomic_read(&cache->c_entry_count) >= cache->c_max_entries) { | 505 | if (atomic_read(&cache->c_entry_count) >= cache->c_max_entries) { |
| 506 | struct list_head *l; | ||
| 507 | |||
| 508 | l = &mb_cache_lru_list; | ||
| 352 | spin_lock(&mb_cache_spinlock); | 509 | spin_lock(&mb_cache_spinlock); |
| 353 | if (!list_empty(&mb_cache_lru_list)) { | 510 | while (!list_is_last(l, &mb_cache_lru_list)) { |
| 354 | ce = list_entry(mb_cache_lru_list.next, | 511 | l = l->next; |
| 355 | struct mb_cache_entry, e_lru_list); | 512 | ce = list_entry(l, struct mb_cache_entry, e_lru_list); |
| 356 | list_del_init(&ce->e_lru_list); | 513 | if (ce->e_cache == cache) { |
| 357 | __mb_cache_entry_unhash(ce); | 514 | list_del_init(&ce->e_lru_list); |
| 515 | if (ce->e_used || ce->e_queued || | ||
| 516 | atomic_read(&ce->e_refcnt)) | ||
| 517 | continue; | ||
| 518 | spin_unlock(&mb_cache_spinlock); | ||
| 519 | /* | ||
| 520 | * Prevent any find or get operation on the | ||
| 521 | * entry. | ||
| 522 | */ | ||
| 523 | hlist_bl_lock(ce->e_block_hash_p); | ||
| 524 | hlist_bl_lock(ce->e_index_hash_p); | ||
| 525 | /* Ignore if it is touched by a find/get */ | ||
| 526 | if (ce->e_used || ce->e_queued || | ||
| 527 | atomic_read(&ce->e_refcnt) || | ||
| 528 | !list_empty(&ce->e_lru_list)) { | ||
| 529 | hlist_bl_unlock(ce->e_index_hash_p); | ||
| 530 | hlist_bl_unlock(ce->e_block_hash_p); | ||
| 531 | l = &mb_cache_lru_list; | ||
| 532 | spin_lock(&mb_cache_spinlock); | ||
| 533 | continue; | ||
| 534 | } | ||
| 535 | mb_assert(list_empty(&ce->e_lru_list)); | ||
| 536 | mb_assert(!(ce->e_used || ce->e_queued || | ||
| 537 | atomic_read(&ce->e_refcnt))); | ||
| 538 | __mb_cache_entry_unhash_unlock(ce); | ||
| 539 | goto found; | ||
| 540 | } | ||
| 358 | } | 541 | } |
| 359 | spin_unlock(&mb_cache_spinlock); | 542 | spin_unlock(&mb_cache_spinlock); |
| 360 | } | 543 | } |
| 361 | if (!ce) { | 544 | |
| 362 | ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags); | 545 | ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags); |
| 363 | if (!ce) | 546 | if (!ce) |
| 364 | return NULL; | 547 | return NULL; |
| 365 | atomic_inc(&cache->c_entry_count); | 548 | atomic_inc(&cache->c_entry_count); |
| 366 | INIT_LIST_HEAD(&ce->e_lru_list); | 549 | INIT_LIST_HEAD(&ce->e_lru_list); |
| 367 | INIT_LIST_HEAD(&ce->e_block_list); | 550 | INIT_HLIST_BL_NODE(&ce->e_block_list); |
| 368 | ce->e_cache = cache; | 551 | INIT_HLIST_BL_NODE(&ce->e_index.o_list); |
| 369 | ce->e_queued = 0; | 552 | ce->e_cache = cache; |
| 370 | } | 553 | ce->e_queued = 0; |
| 554 | atomic_set(&ce->e_refcnt, 0); | ||
| 555 | found: | ||
| 556 | ce->e_block_hash_p = &cache->c_block_hash[0]; | ||
| 557 | ce->e_index_hash_p = &cache->c_index_hash[0]; | ||
| 371 | ce->e_used = 1 + MB_CACHE_WRITER; | 558 | ce->e_used = 1 + MB_CACHE_WRITER; |
| 372 | return ce; | 559 | return ce; |
| 373 | } | 560 | } |
| @@ -393,29 +580,38 @@ mb_cache_entry_insert(struct mb_cache_entry *ce, struct block_device *bdev, | |||
| 393 | { | 580 | { |
| 394 | struct mb_cache *cache = ce->e_cache; | 581 | struct mb_cache *cache = ce->e_cache; |
| 395 | unsigned int bucket; | 582 | unsigned int bucket; |
| 396 | struct list_head *l; | 583 | struct hlist_bl_node *l; |
| 397 | int error = -EBUSY; | 584 | struct hlist_bl_head *block_hash_p; |
| 585 | struct hlist_bl_head *index_hash_p; | ||
| 586 | struct mb_cache_entry *lce; | ||
| 398 | 587 | ||
| 588 | mb_assert(ce); | ||
| 399 | bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), | 589 | bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), |
| 400 | cache->c_bucket_bits); | 590 | cache->c_bucket_bits); |
| 401 | spin_lock(&mb_cache_spinlock); | 591 | block_hash_p = &cache->c_block_hash[bucket]; |
| 402 | list_for_each_prev(l, &cache->c_block_hash[bucket]) { | 592 | hlist_bl_lock(block_hash_p); |
| 403 | struct mb_cache_entry *ce = | 593 | hlist_bl_for_each_entry(lce, l, block_hash_p, e_block_list) { |
| 404 | list_entry(l, struct mb_cache_entry, e_block_list); | 594 | if (lce->e_bdev == bdev && lce->e_block == block) { |
| 405 | if (ce->e_bdev == bdev && ce->e_block == block) | 595 | hlist_bl_unlock(block_hash_p); |
| 406 | goto out; | 596 | return -EBUSY; |
| 597 | } | ||
| 407 | } | 598 | } |
| 408 | __mb_cache_entry_unhash(ce); | 599 | mb_assert(!__mb_cache_entry_is_block_hashed(ce)); |
| 600 | __mb_cache_entry_unhash_block(ce); | ||
| 601 | __mb_cache_entry_unhash_index(ce); | ||
| 409 | ce->e_bdev = bdev; | 602 | ce->e_bdev = bdev; |
| 410 | ce->e_block = block; | 603 | ce->e_block = block; |
| 411 | list_add(&ce->e_block_list, &cache->c_block_hash[bucket]); | 604 | ce->e_block_hash_p = block_hash_p; |
| 412 | ce->e_index.o_key = key; | 605 | ce->e_index.o_key = key; |
| 606 | hlist_bl_add_head(&ce->e_block_list, block_hash_p); | ||
| 607 | hlist_bl_unlock(block_hash_p); | ||
| 413 | bucket = hash_long(key, cache->c_bucket_bits); | 608 | bucket = hash_long(key, cache->c_bucket_bits); |
| 414 | list_add(&ce->e_index.o_list, &cache->c_index_hash[bucket]); | 609 | index_hash_p = &cache->c_index_hash[bucket]; |
| 415 | error = 0; | 610 | hlist_bl_lock(index_hash_p); |
| 416 | out: | 611 | ce->e_index_hash_p = index_hash_p; |
| 417 | spin_unlock(&mb_cache_spinlock); | 612 | hlist_bl_add_head(&ce->e_index.o_list, index_hash_p); |
| 418 | return error; | 613 | hlist_bl_unlock(index_hash_p); |
| 614 | return 0; | ||
| 419 | } | 615 | } |
| 420 | 616 | ||
| 421 | 617 | ||
| @@ -429,24 +625,26 @@ out: | |||
| 429 | void | 625 | void |
| 430 | mb_cache_entry_release(struct mb_cache_entry *ce) | 626 | mb_cache_entry_release(struct mb_cache_entry *ce) |
| 431 | { | 627 | { |
| 432 | spin_lock(&mb_cache_spinlock); | 628 | __mb_cache_entry_release(ce); |
| 433 | __mb_cache_entry_release_unlock(ce); | ||
| 434 | } | 629 | } |
| 435 | 630 | ||
| 436 | 631 | ||
| 437 | /* | 632 | /* |
| 438 | * mb_cache_entry_free() | 633 | * mb_cache_entry_free() |
| 439 | * | 634 | * |
| 440 | * This is equivalent to the sequence mb_cache_entry_takeout() -- | ||
| 441 | * mb_cache_entry_release(). | ||
| 442 | */ | 635 | */ |
| 443 | void | 636 | void |
| 444 | mb_cache_entry_free(struct mb_cache_entry *ce) | 637 | mb_cache_entry_free(struct mb_cache_entry *ce) |
| 445 | { | 638 | { |
| 446 | spin_lock(&mb_cache_spinlock); | 639 | mb_assert(ce); |
| 447 | mb_assert(list_empty(&ce->e_lru_list)); | 640 | mb_assert(list_empty(&ce->e_lru_list)); |
| 448 | __mb_cache_entry_unhash(ce); | 641 | hlist_bl_lock(ce->e_index_hash_p); |
| 449 | __mb_cache_entry_release_unlock(ce); | 642 | __mb_cache_entry_unhash_index(ce); |
| 643 | hlist_bl_unlock(ce->e_index_hash_p); | ||
| 644 | hlist_bl_lock(ce->e_block_hash_p); | ||
| 645 | __mb_cache_entry_unhash_block(ce); | ||
| 646 | hlist_bl_unlock(ce->e_block_hash_p); | ||
| 647 | __mb_cache_entry_release(ce); | ||
| 450 | } | 648 | } |
| 451 | 649 | ||
| 452 | 650 | ||
| @@ -463,84 +661,110 @@ mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev, | |||
| 463 | sector_t block) | 661 | sector_t block) |
| 464 | { | 662 | { |
| 465 | unsigned int bucket; | 663 | unsigned int bucket; |
| 466 | struct list_head *l; | 664 | struct hlist_bl_node *l; |
| 467 | struct mb_cache_entry *ce; | 665 | struct mb_cache_entry *ce; |
| 666 | struct hlist_bl_head *block_hash_p; | ||
| 468 | 667 | ||
| 469 | bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), | 668 | bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), |
| 470 | cache->c_bucket_bits); | 669 | cache->c_bucket_bits); |
| 471 | spin_lock(&mb_cache_spinlock); | 670 | block_hash_p = &cache->c_block_hash[bucket]; |
| 472 | list_for_each(l, &cache->c_block_hash[bucket]) { | 671 | /* First serialize access to the block corresponding hash chain. */ |
| 473 | ce = list_entry(l, struct mb_cache_entry, e_block_list); | 672 | hlist_bl_lock(block_hash_p); |
| 673 | hlist_bl_for_each_entry(ce, l, block_hash_p, e_block_list) { | ||
| 674 | mb_assert(ce->e_block_hash_p == block_hash_p); | ||
| 474 | if (ce->e_bdev == bdev && ce->e_block == block) { | 675 | if (ce->e_bdev == bdev && ce->e_block == block) { |
| 475 | DEFINE_WAIT(wait); | 676 | /* |
| 677 | * Prevent a free from removing the entry. | ||
| 678 | */ | ||
| 679 | atomic_inc(&ce->e_refcnt); | ||
| 680 | hlist_bl_unlock(block_hash_p); | ||
| 681 | __spin_lock_mb_cache_entry(ce); | ||
| 682 | atomic_dec(&ce->e_refcnt); | ||
| 683 | if (ce->e_used > 0) { | ||
| 684 | DEFINE_WAIT(wait); | ||
| 685 | while (ce->e_used > 0) { | ||
| 686 | ce->e_queued++; | ||
| 687 | prepare_to_wait(&mb_cache_queue, &wait, | ||
| 688 | TASK_UNINTERRUPTIBLE); | ||
| 689 | __spin_unlock_mb_cache_entry(ce); | ||
| 690 | schedule(); | ||
| 691 | __spin_lock_mb_cache_entry(ce); | ||
| 692 | ce->e_queued--; | ||
| 693 | } | ||
| 694 | finish_wait(&mb_cache_queue, &wait); | ||
| 695 | } | ||
| 696 | ce->e_used += 1 + MB_CACHE_WRITER; | ||
| 697 | __spin_unlock_mb_cache_entry(ce); | ||
| 476 | 698 | ||
| 477 | if (!list_empty(&ce->e_lru_list)) | 699 | if (!list_empty(&ce->e_lru_list)) { |
| 700 | spin_lock(&mb_cache_spinlock); | ||
| 478 | list_del_init(&ce->e_lru_list); | 701 | list_del_init(&ce->e_lru_list); |
| 479 | |||
| 480 | while (ce->e_used > 0) { | ||
| 481 | ce->e_queued++; | ||
| 482 | prepare_to_wait(&mb_cache_queue, &wait, | ||
| 483 | TASK_UNINTERRUPTIBLE); | ||
| 484 | spin_unlock(&mb_cache_spinlock); | 702 | spin_unlock(&mb_cache_spinlock); |
| 485 | schedule(); | ||
| 486 | spin_lock(&mb_cache_spinlock); | ||
| 487 | ce->e_queued--; | ||
| 488 | } | 703 | } |
| 489 | finish_wait(&mb_cache_queue, &wait); | 704 | if (!__mb_cache_entry_is_block_hashed(ce)) { |
| 490 | ce->e_used += 1 + MB_CACHE_WRITER; | 705 | __mb_cache_entry_release(ce); |
| 491 | |||
| 492 | if (!__mb_cache_entry_is_hashed(ce)) { | ||
| 493 | __mb_cache_entry_release_unlock(ce); | ||
| 494 | return NULL; | 706 | return NULL; |
| 495 | } | 707 | } |
| 496 | goto cleanup; | 708 | return ce; |
| 497 | } | 709 | } |
| 498 | } | 710 | } |
| 499 | ce = NULL; | 711 | hlist_bl_unlock(block_hash_p); |
| 500 | 712 | return NULL; | |
| 501 | cleanup: | ||
| 502 | spin_unlock(&mb_cache_spinlock); | ||
| 503 | return ce; | ||
| 504 | } | 713 | } |
| 505 | 714 | ||
| 506 | #if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) | 715 | #if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) |
| 507 | 716 | ||
| 508 | static struct mb_cache_entry * | 717 | static struct mb_cache_entry * |
| 509 | __mb_cache_entry_find(struct list_head *l, struct list_head *head, | 718 | __mb_cache_entry_find(struct hlist_bl_node *l, struct hlist_bl_head *head, |
| 510 | struct block_device *bdev, unsigned int key) | 719 | struct block_device *bdev, unsigned int key) |
| 511 | { | 720 | { |
| 512 | while (l != head) { | 721 | |
| 722 | /* The index hash chain is alredy acquire by caller. */ | ||
| 723 | while (l != NULL) { | ||
| 513 | struct mb_cache_entry *ce = | 724 | struct mb_cache_entry *ce = |
| 514 | list_entry(l, struct mb_cache_entry, e_index.o_list); | 725 | hlist_bl_entry(l, struct mb_cache_entry, |
| 726 | e_index.o_list); | ||
| 727 | mb_assert(ce->e_index_hash_p == head); | ||
| 515 | if (ce->e_bdev == bdev && ce->e_index.o_key == key) { | 728 | if (ce->e_bdev == bdev && ce->e_index.o_key == key) { |
| 516 | DEFINE_WAIT(wait); | 729 | /* |
| 517 | 730 | * Prevent a free from removing the entry. | |
| 518 | if (!list_empty(&ce->e_lru_list)) | 731 | */ |
| 519 | list_del_init(&ce->e_lru_list); | 732 | atomic_inc(&ce->e_refcnt); |
| 520 | 733 | hlist_bl_unlock(head); | |
| 734 | __spin_lock_mb_cache_entry(ce); | ||
| 735 | atomic_dec(&ce->e_refcnt); | ||
| 736 | ce->e_used++; | ||
| 521 | /* Incrementing before holding the lock gives readers | 737 | /* Incrementing before holding the lock gives readers |
| 522 | priority over writers. */ | 738 | priority over writers. */ |
| 523 | ce->e_used++; | 739 | if (ce->e_used >= MB_CACHE_WRITER) { |
| 524 | while (ce->e_used >= MB_CACHE_WRITER) { | 740 | DEFINE_WAIT(wait); |
| 525 | ce->e_queued++; | 741 | |
| 526 | prepare_to_wait(&mb_cache_queue, &wait, | 742 | while (ce->e_used >= MB_CACHE_WRITER) { |
| 527 | TASK_UNINTERRUPTIBLE); | 743 | ce->e_queued++; |
| 528 | spin_unlock(&mb_cache_spinlock); | 744 | prepare_to_wait(&mb_cache_queue, &wait, |
| 529 | schedule(); | 745 | TASK_UNINTERRUPTIBLE); |
| 530 | spin_lock(&mb_cache_spinlock); | 746 | __spin_unlock_mb_cache_entry(ce); |
| 531 | ce->e_queued--; | 747 | schedule(); |
| 748 | __spin_lock_mb_cache_entry(ce); | ||
| 749 | ce->e_queued--; | ||
| 750 | } | ||
| 751 | finish_wait(&mb_cache_queue, &wait); | ||
| 532 | } | 752 | } |
| 533 | finish_wait(&mb_cache_queue, &wait); | 753 | __spin_unlock_mb_cache_entry(ce); |
| 534 | 754 | if (!list_empty(&ce->e_lru_list)) { | |
| 535 | if (!__mb_cache_entry_is_hashed(ce)) { | ||
| 536 | __mb_cache_entry_release_unlock(ce); | ||
| 537 | spin_lock(&mb_cache_spinlock); | 755 | spin_lock(&mb_cache_spinlock); |
| 756 | list_del_init(&ce->e_lru_list); | ||
| 757 | spin_unlock(&mb_cache_spinlock); | ||
| 758 | } | ||
| 759 | if (!__mb_cache_entry_is_block_hashed(ce)) { | ||
| 760 | __mb_cache_entry_release(ce); | ||
| 538 | return ERR_PTR(-EAGAIN); | 761 | return ERR_PTR(-EAGAIN); |
| 539 | } | 762 | } |
| 540 | return ce; | 763 | return ce; |
| 541 | } | 764 | } |
| 542 | l = l->next; | 765 | l = l->next; |
| 543 | } | 766 | } |
| 767 | hlist_bl_unlock(head); | ||
| 544 | return NULL; | 768 | return NULL; |
| 545 | } | 769 | } |
| 546 | 770 | ||
| @@ -562,13 +786,17 @@ mb_cache_entry_find_first(struct mb_cache *cache, struct block_device *bdev, | |||
| 562 | unsigned int key) | 786 | unsigned int key) |
| 563 | { | 787 | { |
| 564 | unsigned int bucket = hash_long(key, cache->c_bucket_bits); | 788 | unsigned int bucket = hash_long(key, cache->c_bucket_bits); |
| 565 | struct list_head *l; | 789 | struct hlist_bl_node *l; |
| 566 | struct mb_cache_entry *ce; | 790 | struct mb_cache_entry *ce = NULL; |
| 567 | 791 | struct hlist_bl_head *index_hash_p; | |
| 568 | spin_lock(&mb_cache_spinlock); | 792 | |
| 569 | l = cache->c_index_hash[bucket].next; | 793 | index_hash_p = &cache->c_index_hash[bucket]; |
| 570 | ce = __mb_cache_entry_find(l, &cache->c_index_hash[bucket], bdev, key); | 794 | hlist_bl_lock(index_hash_p); |
| 571 | spin_unlock(&mb_cache_spinlock); | 795 | if (!hlist_bl_empty(index_hash_p)) { |
| 796 | l = hlist_bl_first(index_hash_p); | ||
| 797 | ce = __mb_cache_entry_find(l, index_hash_p, bdev, key); | ||
| 798 | } else | ||
| 799 | hlist_bl_unlock(index_hash_p); | ||
| 572 | return ce; | 800 | return ce; |
| 573 | } | 801 | } |
| 574 | 802 | ||
| @@ -597,13 +825,17 @@ mb_cache_entry_find_next(struct mb_cache_entry *prev, | |||
| 597 | { | 825 | { |
| 598 | struct mb_cache *cache = prev->e_cache; | 826 | struct mb_cache *cache = prev->e_cache; |
| 599 | unsigned int bucket = hash_long(key, cache->c_bucket_bits); | 827 | unsigned int bucket = hash_long(key, cache->c_bucket_bits); |
| 600 | struct list_head *l; | 828 | struct hlist_bl_node *l; |
| 601 | struct mb_cache_entry *ce; | 829 | struct mb_cache_entry *ce; |
| 830 | struct hlist_bl_head *index_hash_p; | ||
| 602 | 831 | ||
| 603 | spin_lock(&mb_cache_spinlock); | 832 | index_hash_p = &cache->c_index_hash[bucket]; |
| 833 | mb_assert(prev->e_index_hash_p == index_hash_p); | ||
| 834 | hlist_bl_lock(index_hash_p); | ||
| 835 | mb_assert(!hlist_bl_empty(index_hash_p)); | ||
| 604 | l = prev->e_index.o_list.next; | 836 | l = prev->e_index.o_list.next; |
| 605 | ce = __mb_cache_entry_find(l, &cache->c_index_hash[bucket], bdev, key); | 837 | ce = __mb_cache_entry_find(l, index_hash_p, bdev, key); |
| 606 | __mb_cache_entry_release_unlock(prev); | 838 | __mb_cache_entry_release(prev); |
| 607 | return ce; | 839 | return ce; |
| 608 | } | 840 | } |
| 609 | 841 | ||
