mbcache2: reimplement mbcache

Original mbcache was designed to have more features than what ext? filesystems ended up using. It supported entry being in more hashes, it had a home-grown rwlocking of each entry, and one cache could cache entries from multiple filesystems. This genericity also resulted in more complex locking, larger cache entries, and generally more code complexity. This is reimplementation of the mbcache functionality to exactly fit the purpose ext? filesystems use it for. Cache entries are now considerably smaller (7 instead of 13 longs), the code is considerably smaller as well (414 vs 913 lines of code), and IMO also simpler. The new code is also much more lightweight. I have measured the speed using artificial xattr-bench benchmark, which spawns P processes, each process sets xattr for F different files, and the value of xattr is randomly chosen from a pool of V values. Averages of runtimes for 5 runs for various combinations of parameters are below. The first value in each cell is old mbache, the second value is the new mbcache. V=10 F\P 1 2 4 8 16 32 64 10 0.158,0.157 0.208,0.196 0.500,0.277 0.798,0.400 3.258,0.584 13.807,1.047 61.339,2.803 100 0.172,0.167 0.279,0.222 0.520,0.275 0.825,0.341 2.981,0.505 12.022,1.202 44.641,2.943 1000 0.185,0.174 0.297,0.239 0.445,0.283 0.767,0.340 2.329,0.480 6.342,1.198 16.440,3.888 V=100 F\P 1 2 4 8 16 32 64 10 0.162,0.153 0.200,0.186 0.362,0.257 0.671,0.496 1.433,0.943 3.801,1.345 7.938,2.501 100 0.153,0.160 0.221,0.199 0.404,0.264 0.945,0.379 1.556,0.485 3.761,1.156 7.901,2.484 1000 0.215,0.191 0.303,0.246 0.471,0.288 0.960,0.347 1.647,0.479 3.916,1.176 8.058,3.160 V=1000 F\P 1 2 4 8 16 32 64 10 0.151,0.129 0.210,0.163 0.326,0.245 0.685,0.521 1.284,0.859 3.087,2.251 6.451,4.801 100 0.154,0.153 0.211,0.191 0.276,0.282 0.687,0.506 1.202,0.877 3.259,1.954 8.738,2.887 1000 0.145,0.179 0.202,0.222 0.449,0.319 0.899,0.333 1.577,0.524 4.221,1.240 9.782,3.579 V=10000 F\P 1 2 4 8 16 32 64 10 0.161,0.154 0.198,0.190 0.296,0.256 0.662,0.480 1.192,0.818 2.989,2.200 6.362,4.746 100 0.176,0.174 0.236,0.203 0.326,0.255 0.696,0.511 1.183,0.855 4.205,3.444 19.510,17.760 1000 0.199,0.183 0.240,0.227 1.159,1.014 2.286,2.154 6.023,6.039 ---,10.933 ---,36.620 V=100000 F\P 1 2 4 8 16 32 64 10 0.171,0.162 0.204,0.198 0.285,0.230 0.692,0.500 1.225,0.881 2.990,2.243 6.379,4.771 100 0.151,0.171 0.220,0.210 0.295,0.255 0.720,0.518 1.226,0.844 3.423,2.831 19.234,17.544 1000 0.192,0.189 0.249,0.225 1.162,1.043 2.257,2.093 5.853,4.997 ---,10.399 ---,32.198 We see that the new code is faster in pretty much all the cases and starting from 4 processes there are significant gains with the new code resulting in upto 20-times shorter runtimes. Also for large numbers of cached entries all values for the old code could not be measured as the kernel started hitting softlockups and died before the test completed. Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
author: Jan Kara <jack@suse.cz> 2016-02-22 11:49:09 -0500
committer: Theodore Ts'o <tytso@mit.edu> 2016-02-22 11:49:09 -0500
commit: f9a61eb4e2471c56a63cd804c7474128138c38ac (patch)
tree: 4336a713bce2ea6d139775731848816d3b4b30fd /fs/mbcache2.c
parent: 87f9a031af48defee9f34c6aaf06d6f1988c244d (diff)
1 files changed, 359 insertions, 0 deletions
diff --git a/fs/mbcache2.c b/fs/mbcache2.c
new file mode 100644
index 000000000000..5c3e1a8c38f6
--- /dev/null
+++ b/fs/mbcache2.c
@@ -0,0 +1,359 @@
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/list_bl.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/mbcache2.h>
+/*
+ * Mbcache is a simple key-value store. Keys need not be unique, however
+ * key-value pairs are expected to be unique (we use this fact in
+ * mb2_cache_entry_delete_block()).
+ *
+ * Ext2 and ext4 use this cache for deduplication of extended attribute blocks.
+ * They use hash of a block contents as a key and block number as a value.
+ * That's why keys need not be unique (different xattr blocks may end up having
+ * the same hash). However block number always uniquely identifies a cache
+ * entry.
+ *
+ * We provide functions for creation and removal of entries, search by key,
+ * and a special "delete entry with given key-value pair" operation. Fixed
+ * size hash table is used for fast key lookups.
+ */
+struct mb2_cache {
+        /* Hash table of entries */
+        struct hlist_bl_head    *c_hash;
+        /* log2 of hash table size */
+        int                     c_bucket_bits;
+        /* Protects c_lru_list, c_entry_count */
+        spinlock_t              c_lru_list_lock;
+        struct list_head        c_lru_list;
+        /* Number of entries in cache */
+        unsigned long           c_entry_count;
+        struct shrinker         c_shrink;
+};
+static struct kmem_cache *mb2_entry_cache;
+/*
+ * mb2_cache_entry_create - create entry in cache
+ * @cache - cache where the entry should be created
+ * @mask - gfp mask with which the entry should be allocated
+ * @key - key of the entry
+ * @block - block that contains data
+ *
+ * Creates entry in @cache with key @key and records that data is stored in
+ * block @block. The function returns -EBUSY if entry with the same key
+ * and for the same block already exists in cache. Otherwise 0 is returned.
+ */
+int mb2_cache_entry_create(struct mb2_cache *cache, gfp_t mask, u32 key,
+                           sector_t block)
+{
+        struct mb2_cache_entry *entry, *dup;
+        struct hlist_bl_node *dup_node;
+        struct hlist_bl_head *head;
+        entry = kmem_cache_alloc(mb2_entry_cache, mask);
+        if (!entry)
+                return -ENOMEM;
+        INIT_LIST_HEAD(&entry->e_lru_list);
+        /* One ref for hash, one ref returned */
+        atomic_set(&entry->e_refcnt, 1);
+        entry->e_key = key;
+        entry->e_block = block;
+        head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
+        entry->e_hash_list_head = head;
+        hlist_bl_lock(head);
+        hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
+                if (dup->e_key == key && dup->e_block == block) {
+                        hlist_bl_unlock(head);
+                        kmem_cache_free(mb2_entry_cache, entry);
+                        return -EBUSY;
+                }
+        }
+        hlist_bl_add_head(&entry->e_hash_list, head);
+        hlist_bl_unlock(head);
+        spin_lock(&cache->c_lru_list_lock);
+        list_add_tail(&entry->e_lru_list, &cache->c_lru_list);
+        /* Grab ref for LRU list */
+        atomic_inc(&entry->e_refcnt);
+        cache->c_entry_count++;
+        spin_unlock(&cache->c_lru_list_lock);
+        return 0;
+}
+EXPORT_SYMBOL(mb2_cache_entry_create);
+void __mb2_cache_entry_free(struct mb2_cache_entry *entry)
+{
+        kmem_cache_free(mb2_entry_cache, entry);
+}
+EXPORT_SYMBOL(__mb2_cache_entry_free);
+static struct mb2_cache_entry *__entry_find(struct mb2_cache *cache,
+                                            struct mb2_cache_entry *entry,
+                                            u32 key)
+{
+        struct mb2_cache_entry *old_entry = entry;
+        struct hlist_bl_node *node;
+        struct hlist_bl_head *head;
+        if (entry)
+                head = entry->e_hash_list_head;
+        else
+                head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
+        hlist_bl_lock(head);
+        if (entry && !hlist_bl_unhashed(&entry->e_hash_list))
+                node = entry->e_hash_list.next;
+        else
+                node = hlist_bl_first(head);
+        while (node) {
+                entry = hlist_bl_entry(node, struct mb2_cache_entry,
+                                       e_hash_list);
+                if (entry->e_key == key) {
+                        atomic_inc(&entry->e_refcnt);
+                        goto out;
+                }
+                node = node->next;
+        }
+        entry = NULL;
+out:
+        hlist_bl_unlock(head);
+        if (old_entry)
+                mb2_cache_entry_put(cache, old_entry);
+        return entry;
+}
+/*
+ * mb2_cache_entry_find_first - find the first entry in cache with given key
+ * @cache: cache where we should search
+ * @key: key to look for
+ *
+ * Search in @cache for entry with key @key. Grabs reference to the first
+ * entry found and returns the entry.
+ */
+struct mb2_cache_entry *mb2_cache_entry_find_first(struct mb2_cache *cache,
+                                                   u32 key)
+{
+        return __entry_find(cache, NULL, key);
+}
+EXPORT_SYMBOL(mb2_cache_entry_find_first);
+/*
+ * mb2_cache_entry_find_next - find next entry in cache with the same
+ * @cache: cache where we should search
+ * @entry: entry to start search from
+ *
+ * Finds next entry in the hash chain which has the same key as @entry.
+ * If @entry is unhashed (which can happen when deletion of entry races
+ * with the search), finds the first entry in the hash chain. The function
+ * drops reference to @entry and returns with a reference to the found entry.
+ */
+struct mb2_cache_entry *mb2_cache_entry_find_next(struct mb2_cache *cache,
+                                                  struct mb2_cache_entry *entry)
+{
+        return __entry_find(cache, entry, entry->e_key);
+}
+EXPORT_SYMBOL(mb2_cache_entry_find_next);
+/* mb2_cache_entry_delete_block - remove information about block from cache
+ * @cache - cache we work with
+ * @key - key of the entry to remove
+ * @block - block containing data for @key
+ *
+ * Remove entry from cache @cache with key @key with data stored in @block.
+ */
+void mb2_cache_entry_delete_block(struct mb2_cache *cache, u32 key,
+                                  sector_t block)
+{
+        struct hlist_bl_node *node;
+        struct hlist_bl_head *head;
+        struct mb2_cache_entry *entry;
+        head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
+        hlist_bl_lock(head);
+        hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
+                if (entry->e_key == key && entry->e_block == block) {
+                        /* We keep hash list reference to keep entry alive */
+                        hlist_bl_del_init(&entry->e_hash_list);
+                        hlist_bl_unlock(head);
+                        spin_lock(&cache->c_lru_list_lock);
+                        if (!list_empty(&entry->e_lru_list)) {
+                                list_del_init(&entry->e_lru_list);
+                                cache->c_entry_count--;
+                                atomic_dec(&entry->e_refcnt);
+                        }
+                        spin_unlock(&cache->c_lru_list_lock);
+                        mb2_cache_entry_put(cache, entry);
+                        return;
+                }
+        }
+        hlist_bl_unlock(head);
+}
+EXPORT_SYMBOL(mb2_cache_entry_delete_block);
+/* mb2_cache_entry_touch - cache entry got used
+ * @cache - cache the entry belongs to
+ * @entry - entry that got used
+ *
+ * Move entry in lru list to reflect the fact that it was used.
+ */
+void mb2_cache_entry_touch(struct mb2_cache *cache,
+                           struct mb2_cache_entry *entry)
+{
+        spin_lock(&cache->c_lru_list_lock);
+        if (!list_empty(&entry->e_lru_list))
+                list_move_tail(&cache->c_lru_list, &entry->e_lru_list);
+        spin_unlock(&cache->c_lru_list_lock);
+}
+EXPORT_SYMBOL(mb2_cache_entry_touch);
+static unsigned long mb2_cache_count(struct shrinker *shrink,
+                                     struct shrink_control *sc)
+{
+        struct mb2_cache *cache = container_of(shrink, struct mb2_cache,
+                                               c_shrink);
+        return cache->c_entry_count;
+}
+/* Shrink number of entries in cache */
+static unsigned long mb2_cache_scan(struct shrinker *shrink,
+                                    struct shrink_control *sc)
+{
+        int nr_to_scan = sc->nr_to_scan;
+        struct mb2_cache *cache = container_of(shrink, struct mb2_cache,
+                                              c_shrink);
+        struct mb2_cache_entry *entry;
+        struct hlist_bl_head *head;
+        unsigned int shrunk = 0;
+        spin_lock(&cache->c_lru_list_lock);
+        while (nr_to_scan-- && !list_empty(&cache->c_lru_list)) {
+                entry = list_first_entry(&cache->c_lru_list,
+                                         struct mb2_cache_entry, e_lru_list);
+                list_del_init(&entry->e_lru_list);
+                cache->c_entry_count--;
+                /*
+                 * We keep LRU list reference so that entry doesn't go away
+                 * from under us.
+                 */
+                spin_unlock(&cache->c_lru_list_lock);
+                head = entry->e_hash_list_head;
+                hlist_bl_lock(head);
+                if (!hlist_bl_unhashed(&entry->e_hash_list)) {
+                        hlist_bl_del_init(&entry->e_hash_list);
+                        atomic_dec(&entry->e_refcnt);
+                }
+                hlist_bl_unlock(head);
+                if (mb2_cache_entry_put(cache, entry))
+                        shrunk++;
+                cond_resched();
+                spin_lock(&cache->c_lru_list_lock);
+        }
+        spin_unlock(&cache->c_lru_list_lock);
+        return shrunk;
+}
+/*
+ * mb2_cache_create - create cache
+ * @bucket_bits: log2 of the hash table size
+ *
+ * Create cache for keys with 2^bucket_bits hash entries.
+ */
+struct mb2_cache *mb2_cache_create(int bucket_bits)
+{
+        struct mb2_cache *cache;
+        int bucket_count = 1 << bucket_bits;
+        int i;
+        if (!try_module_get(THIS_MODULE))
+                return NULL;
+        cache = kzalloc(sizeof(struct mb2_cache), GFP_KERNEL);
+        if (!cache)
+                goto err_out;
+        cache->c_bucket_bits = bucket_bits;
+        INIT_LIST_HEAD(&cache->c_lru_list);
+        spin_lock_init(&cache->c_lru_list_lock);
+        cache->c_hash = kmalloc(bucket_count * sizeof(struct hlist_bl_head),
+                                GFP_KERNEL);
+        if (!cache->c_hash) {
+                kfree(cache);
+                goto err_out;
+        }
+        for (i = 0; i < bucket_count; i++)
+                INIT_HLIST_BL_HEAD(&cache->c_hash[i]);
+        cache->c_shrink.count_objects = mb2_cache_count;
+        cache->c_shrink.scan_objects = mb2_cache_scan;
+        cache->c_shrink.seeks = DEFAULT_SEEKS;
+        register_shrinker(&cache->c_shrink);
+        return cache;
+err_out:
+        module_put(THIS_MODULE);
+        return NULL;
+}
+EXPORT_SYMBOL(mb2_cache_create);
+/*
+ * mb2_cache_destroy - destroy cache
+ * @cache: the cache to destroy
+ *
+ * Free all entries in cache and cache itself. Caller must make sure nobody
+ * (except shrinker) can reach @cache when calling this.
+ */
+void mb2_cache_destroy(struct mb2_cache *cache)
+{
+        struct mb2_cache_entry *entry, *next;
+        unregister_shrinker(&cache->c_shrink);
+        /*
+         * We don't bother with any locking. Cache must not be used at this
+         * point.
+         */
+        list_for_each_entry_safe(entry, next, &cache->c_lru_list, e_lru_list) {
+                if (!hlist_bl_unhashed(&entry->e_hash_list)) {
+                        hlist_bl_del_init(&entry->e_hash_list);
+                        atomic_dec(&entry->e_refcnt);
+                } else
+                        WARN_ON(1);
+                list_del(&entry->e_lru_list);
+                WARN_ON(atomic_read(&entry->e_refcnt) != 1);
+                mb2_cache_entry_put(cache, entry);
+        }
+        kfree(cache->c_hash);
+        kfree(cache);
+        module_put(THIS_MODULE);
+}
+EXPORT_SYMBOL(mb2_cache_destroy);
+static int __init mb2cache_init(void)
+{
+        mb2_entry_cache = kmem_cache_create("mbcache",
+                                sizeof(struct mb2_cache_entry), 0,
+                                SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL);
+        BUG_ON(!mb2_entry_cache);
+        return 0;
+}
+static void __exit mb2cache_exit(void)
+{
+        kmem_cache_destroy(mb2_entry_cache);
+}
+module_init(mb2cache_init)
+module_exit(mb2cache_exit)
+MODULE_AUTHOR("Jan Kara <jack@suse.cz>");
+MODULE_DESCRIPTION("Meta block cache (for extended attributes)");
+MODULE_LICENSE("GPL");
author	Jan Kara <jack@suse.cz>	2016-02-22 11:49:09 -0500
committer	Theodore Ts'o <tytso@mit.edu>	2016-02-22 11:49:09 -0500
commit	f9a61eb4e2471c56a63cd804c7474128138c38ac (patch)
tree	4336a713bce2ea6d139775731848816d3b4b30fd /fs/mbcache2.c
parent	87f9a031af48defee9f34c6aaf06d6f1988c244d (diff)

diff --git a/fs/mbcache2.c b/fs/mbcache2.c new file mode 100644 index 000000000000..5c3e1a8c38f6 --- /dev/null +++ b/fs/mbcache2.c
@@ -0,0 +1,359 @@
	1	#include <linux/spinlock.h>
	2	#include <linux/slab.h>
	3	#include <linux/list.h>
	4	#include <linux/list_bl.h>
	5	#include <linux/module.h>
	6	#include <linux/sched.h>
	7	#include <linux/mbcache2.h>
	8
	9	/*
	10	* Mbcache is a simple key-value store. Keys need not be unique, however
	11	* key-value pairs are expected to be unique (we use this fact in
	12	* mb2_cache_entry_delete_block()).
	13	*
	14	* Ext2 and ext4 use this cache for deduplication of extended attribute blocks.
	15	* They use hash of a block contents as a key and block number as a value.
	16	* That's why keys need not be unique (different xattr blocks may end up having
	17	* the same hash). However block number always uniquely identifies a cache
	18	* entry.
	19	*
	20	* We provide functions for creation and removal of entries, search by key,
	21	* and a special "delete entry with given key-value pair" operation. Fixed
	22	* size hash table is used for fast key lookups.
	23	*/
	24
	25	struct mb2_cache {
	26	/* Hash table of entries */
	27	struct hlist_bl_head *c_hash;
	28	/* log2 of hash table size */
	29	int c_bucket_bits;
	30	/* Protects c_lru_list, c_entry_count */
	31	spinlock_t c_lru_list_lock;
	32	struct list_head c_lru_list;
	33	/* Number of entries in cache */
	34	unsigned long c_entry_count;
	35	struct shrinker c_shrink;
	36	};
	37
	38	static struct kmem_cache *mb2_entry_cache;
	39
	40	/*
	41	* mb2_cache_entry_create - create entry in cache
	42	* @cache - cache where the entry should be created
	43	* @mask - gfp mask with which the entry should be allocated
	44	* @key - key of the entry
	45	* @block - block that contains data
	46	*
	47	* Creates entry in @cache with key @key and records that data is stored in
	48	* block @block. The function returns -EBUSY if entry with the same key
	49	* and for the same block already exists in cache. Otherwise 0 is returned.
	50	*/
	51	int mb2_cache_entry_create(struct mb2_cache *cache, gfp_t mask, u32 key,
	52	sector_t block)
	53	{
	54	struct mb2_cache_entry entry, dup;
	55	struct hlist_bl_node *dup_node;
	56	struct hlist_bl_head *head;
	57
	58	entry = kmem_cache_alloc(mb2_entry_cache, mask);
	59	if (!entry)
	60	return -ENOMEM;
	61
	62	INIT_LIST_HEAD(&entry->e_lru_list);
	63	/* One ref for hash, one ref returned */
	64	atomic_set(&entry->e_refcnt, 1);
	65	entry->e_key = key;
	66	entry->e_block = block;
	67	head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
	68	entry->e_hash_list_head = head;
	69	hlist_bl_lock(head);
	70	hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
	71	if (dup->e_key == key && dup->e_block == block) {
	72	hlist_bl_unlock(head);
	73	kmem_cache_free(mb2_entry_cache, entry);
	74	return -EBUSY;
	75	}
	76	}
	77	hlist_bl_add_head(&entry->e_hash_list, head);
	78	hlist_bl_unlock(head);
	79
	80	spin_lock(&cache->c_lru_list_lock);
	81	list_add_tail(&entry->e_lru_list, &cache->c_lru_list);
	82	/* Grab ref for LRU list */
	83	atomic_inc(&entry->e_refcnt);
	84	cache->c_entry_count++;
	85	spin_unlock(&cache->c_lru_list_lock);
	86
	87	return 0;
	88	}
	89	EXPORT_SYMBOL(mb2_cache_entry_create);
	90
	91	void __mb2_cache_entry_free(struct mb2_cache_entry *entry)
	92	{
	93	kmem_cache_free(mb2_entry_cache, entry);
	94	}
	95	EXPORT_SYMBOL(__mb2_cache_entry_free);
	96
	97	static struct mb2_cache_entry __entry_find(struct mb2_cache cache,
	98	struct mb2_cache_entry *entry,
	99	u32 key)
	100	{
	101	struct mb2_cache_entry *old_entry = entry;
	102	struct hlist_bl_node *node;
	103	struct hlist_bl_head *head;
	104
	105	if (entry)
	106	head = entry->e_hash_list_head;
	107	else
	108	head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
	109	hlist_bl_lock(head);
	110	if (entry && !hlist_bl_unhashed(&entry->e_hash_list))
	111	node = entry->e_hash_list.next;
	112	else
	113	node = hlist_bl_first(head);
	114	while (node) {
	115	entry = hlist_bl_entry(node, struct mb2_cache_entry,
	116	e_hash_list);
	117	if (entry->e_key == key) {
	118	atomic_inc(&entry->e_refcnt);
	119	goto out;
	120	}
	121	node = node->next;
	122	}
	123	entry = NULL;
	124	out:
	125	hlist_bl_unlock(head);
	126	if (old_entry)
	127	mb2_cache_entry_put(cache, old_entry);
	128
	129	return entry;
	130	}
	131
	132	/*
	133	* mb2_cache_entry_find_first - find the first entry in cache with given key
	134	* @cache: cache where we should search
	135	* @key: key to look for
	136	*
	137	* Search in @cache for entry with key @key. Grabs reference to the first
	138	* entry found and returns the entry.
	139	*/
	140	struct mb2_cache_entry mb2_cache_entry_find_first(struct mb2_cache cache,
	141	u32 key)
	142	{
	143	return __entry_find(cache, NULL, key);
	144	}
	145	EXPORT_SYMBOL(mb2_cache_entry_find_first);
	146
	147	/*
	148	* mb2_cache_entry_find_next - find next entry in cache with the same
	149	* @cache: cache where we should search
	150	* @entry: entry to start search from
	151	*
	152	* Finds next entry in the hash chain which has the same key as @entry.
	153	* If @entry is unhashed (which can happen when deletion of entry races
	154	* with the search), finds the first entry in the hash chain. The function
	155	* drops reference to @entry and returns with a reference to the found entry.
	156	*/
	157	struct mb2_cache_entry mb2_cache_entry_find_next(struct mb2_cache cache,
	158	struct mb2_cache_entry *entry)
	159	{
	160	return __entry_find(cache, entry, entry->e_key);
	161	}
	162	EXPORT_SYMBOL(mb2_cache_entry_find_next);
	163
	164	/* mb2_cache_entry_delete_block - remove information about block from cache
	165	* @cache - cache we work with
	166	* @key - key of the entry to remove
	167	* @block - block containing data for @key
	168	*
	169	* Remove entry from cache @cache with key @key with data stored in @block.
	170	*/
	171	void mb2_cache_entry_delete_block(struct mb2_cache *cache, u32 key,
	172	sector_t block)
	173	{
	174	struct hlist_bl_node *node;
	175	struct hlist_bl_head *head;
	176	struct mb2_cache_entry *entry;
	177
	178	head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
	179	hlist_bl_lock(head);
	180	hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
	181	if (entry->e_key == key && entry->e_block == block) {
	182	/* We keep hash list reference to keep entry alive */
	183	hlist_bl_del_init(&entry->e_hash_list);
	184	hlist_bl_unlock(head);
	185	spin_lock(&cache->c_lru_list_lock);
	186	if (!list_empty(&entry->e_lru_list)) {
	187	list_del_init(&entry->e_lru_list);
	188	cache->c_entry_count--;
	189	atomic_dec(&entry->e_refcnt);
	190	}
	191	spin_unlock(&cache->c_lru_list_lock);
	192	mb2_cache_entry_put(cache, entry);
	193	return;
	194	}
	195	}
	196	hlist_bl_unlock(head);
	197	}
	198	EXPORT_SYMBOL(mb2_cache_entry_delete_block);
	199
	200	/* mb2_cache_entry_touch - cache entry got used
	201	* @cache - cache the entry belongs to
	202	* @entry - entry that got used
	203	*
	204	* Move entry in lru list to reflect the fact that it was used.
	205	*/
	206	void mb2_cache_entry_touch(struct mb2_cache *cache,
	207	struct mb2_cache_entry *entry)
	208	{
	209	spin_lock(&cache->c_lru_list_lock);
	210	if (!list_empty(&entry->e_lru_list))
	211	list_move_tail(&cache->c_lru_list, &entry->e_lru_list);
	212	spin_unlock(&cache->c_lru_list_lock);
	213	}
	214	EXPORT_SYMBOL(mb2_cache_entry_touch);
	215
	216	static unsigned long mb2_cache_count(struct shrinker *shrink,
	217	struct shrink_control *sc)
	218	{
	219	struct mb2_cache *cache = container_of(shrink, struct mb2_cache,
	220	c_shrink);
	221
	222	return cache->c_entry_count;
	223	}
	224
	225	/* Shrink number of entries in cache */
	226	static unsigned long mb2_cache_scan(struct shrinker *shrink,
	227	struct shrink_control *sc)
	228	{
	229	int nr_to_scan = sc->nr_to_scan;
	230	struct mb2_cache *cache = container_of(shrink, struct mb2_cache,
	231	c_shrink);
	232	struct mb2_cache_entry *entry;
	233	struct hlist_bl_head *head;
	234	unsigned int shrunk = 0;
	235
	236	spin_lock(&cache->c_lru_list_lock);
	237	while (nr_to_scan-- && !list_empty(&cache->c_lru_list)) {
	238	entry = list_first_entry(&cache->c_lru_list,
	239	struct mb2_cache_entry, e_lru_list);
	240	list_del_init(&entry->e_lru_list);
	241	cache->c_entry_count--;
	242	/*
	243	* We keep LRU list reference so that entry doesn't go away
	244	* from under us.
	245	*/
	246	spin_unlock(&cache->c_lru_list_lock);
	247	head = entry->e_hash_list_head;
	248	hlist_bl_lock(head);
	249	if (!hlist_bl_unhashed(&entry->e_hash_list)) {
	250	hlist_bl_del_init(&entry->e_hash_list);
	251	atomic_dec(&entry->e_refcnt);
	252	}
	253	hlist_bl_unlock(head);
	254	if (mb2_cache_entry_put(cache, entry))
	255	shrunk++;
	256	cond_resched();
	257	spin_lock(&cache->c_lru_list_lock);
	258	}
	259	spin_unlock(&cache->c_lru_list_lock);
	260
	261	return shrunk;
	262	}
	263
	264	/*
	265	* mb2_cache_create - create cache
	266	* @bucket_bits: log2 of the hash table size
	267	*
	268	* Create cache for keys with 2^bucket_bits hash entries.
	269	*/
	270	struct mb2_cache *mb2_cache_create(int bucket_bits)
	271	{
	272	struct mb2_cache *cache;
	273	int bucket_count = 1 << bucket_bits;
	274	int i;
	275
	276	if (!try_module_get(THIS_MODULE))
	277	return NULL;
	278
	279	cache = kzalloc(sizeof(struct mb2_cache), GFP_KERNEL);
	280	if (!cache)
	281	goto err_out;
	282	cache->c_bucket_bits = bucket_bits;
	283	INIT_LIST_HEAD(&cache->c_lru_list);
	284	spin_lock_init(&cache->c_lru_list_lock);
	285	cache->c_hash = kmalloc(bucket_count * sizeof(struct hlist_bl_head),
	286	GFP_KERNEL);
	287	if (!cache->c_hash) {
	288	kfree(cache);
	289	goto err_out;
	290	}
	291	for (i = 0; i < bucket_count; i++)
	292	INIT_HLIST_BL_HEAD(&cache->c_hash[i]);
	293
	294	cache->c_shrink.count_objects = mb2_cache_count;
	295	cache->c_shrink.scan_objects = mb2_cache_scan;
	296	cache->c_shrink.seeks = DEFAULT_SEEKS;
	297	register_shrinker(&cache->c_shrink);
	298
	299	return cache;
	300
	301	err_out:
	302	module_put(THIS_MODULE);
	303	return NULL;
	304	}
	305	EXPORT_SYMBOL(mb2_cache_create);
	306
	307	/*
	308	* mb2_cache_destroy - destroy cache
	309	* @cache: the cache to destroy
	310	*
	311	* Free all entries in cache and cache itself. Caller must make sure nobody
	312	* (except shrinker) can reach @cache when calling this.
	313	*/
	314	void mb2_cache_destroy(struct mb2_cache *cache)
	315	{
	316	struct mb2_cache_entry entry, next;
	317
	318	unregister_shrinker(&cache->c_shrink);
	319
	320	/*
	321	* We don't bother with any locking. Cache must not be used at this
	322	* point.
	323	*/
	324	list_for_each_entry_safe(entry, next, &cache->c_lru_list, e_lru_list) {
	325	if (!hlist_bl_unhashed(&entry->e_hash_list)) {
	326	hlist_bl_del_init(&entry->e_hash_list);
	327	atomic_dec(&entry->e_refcnt);
	328	} else
	329	WARN_ON(1);
	330	list_del(&entry->e_lru_list);
	331	WARN_ON(atomic_read(&entry->e_refcnt) != 1);
	332	mb2_cache_entry_put(cache, entry);
	333	}
	334	kfree(cache->c_hash);
	335	kfree(cache);
	336	module_put(THIS_MODULE);
	337	}
	338	EXPORT_SYMBOL(mb2_cache_destroy);
	339
	340	static int __init mb2cache_init(void)
	341	{
	342	mb2_entry_cache = kmem_cache_create("mbcache",
	343	sizeof(struct mb2_cache_entry), 0,
	344	SLAB_RECLAIM_ACCOUNT\|SLAB_MEM_SPREAD, NULL);
	345	BUG_ON(!mb2_entry_cache);
	346	return 0;
	347	}
	348
	349	static void __exit mb2cache_exit(void)
	350	{
	351	kmem_cache_destroy(mb2_entry_cache);
	352	}
	353
	354	module_init(mb2cache_init)
	355	module_exit(mb2cache_exit)
	356
	357	MODULE_AUTHOR("Jan Kara <jack@suse.cz>");
	358	MODULE_DESCRIPTION("Meta block cache (for extended attributes)");
	359	MODULE_LICENSE("GPL");