nfsd: dynamically allocate DRC entries

The existing code keeps a fixed-size cache of 1024 entries. This is much too small for a busy server, and wastes memory on an idle one. This patch changes the code to dynamically allocate and free these cache entries. A cap on the number of entries is retained, but it's much larger than the existing value and now scales with the amount of low memory in the machine. Signed-off-by: Jeff Layton <jlayton@redhat.com> Signed-off-by: J. Bruce Fields <bfields@redhat.com>
author: Jeff Layton <jlayton@redhat.com> 2013-02-04 08:18:02 -0500
committer: J. Bruce Fields <bfields@redhat.com> 2013-02-04 17:19:10 -0500
commit: 0338dd157282c19696d3c32614a748d0ba814b12 (patch)
tree: 4a42757386eab303fd723bcfbcadf843c921ebc8 /fs/nfsd
parent: 0ee0bf7ee5b55f232b645c4af0b0c37d4e115a32 (diff)
1 files changed, 75 insertions, 50 deletions
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index c0c58471eb45..d213e6e69e46 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -10,17 +10,13 @@
 #include <linux/slab.h>
 #include <linux/sunrpc/clnt.h>
+#include <linux/highmem.h>
 #include "nfsd.h"
 #include "cache.h"
-/* Size of reply cache. Common values are:
+#define NFSDDBG_FACILITY        NFSDDBG_REPCACHE
- * 4.3BSD:      128
- * 4.4BSD:      256
- * Solaris2:    1024
- * DEC Unix:    512-4096
- */
-#define CACHESIZE               1024
 #define HASHSIZE                64
 static struct hlist_head *      cache_hash;
@@ -28,6 +24,7 @@ static struct list_head 	lru_head;
 static int                      cache_disabled = 1;
 static struct kmem_cache        *drc_slab;
 static unsigned int             num_drc_entries;
+static unsigned int             max_drc_entries;
 /*
 * Calculate the hash index from an XID.
@@ -48,6 +45,34 @@ static int	nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec);
 */
 static DEFINE_SPINLOCK(cache_lock);
+/*
+ * Put a cap on the size of the DRC based on the amount of available
+ * low memory in the machine.
+ *
+ *  64MB:    8192
+ * 128MB:   11585
+ * 256MB:   16384
+ * 512MB:   23170
+ *   1GB:   32768
+ *   2GB:   46340
+ *   4GB:   65536
+ *   8GB:   92681
+ *  16GB:  131072
+ *
+ * ...with a hard cap of 256k entries. In the worst case, each entry will be
+ * ~1k, so the above numbers should give a rough max of the amount of memory
+ * used in k.
+ */
+static unsigned int
+nfsd_cache_size_limit(void)
+{
+        unsigned int limit;
+        unsigned long low_pages = totalram_pages - totalhigh_pages;
+        limit = (16 * int_sqrt(low_pages)) << (PAGE_SHIFT-10);
+        return min_t(unsigned int, limit, 256*1024);
+}
 static struct svc_cacherep *
 nfsd_reply_cache_alloc(void)
 {
@@ -68,6 +93,7 @@ nfsd_reply_cache_free_locked(struct svc_cacherep *rp)
 {
        if (rp->c_type == RC_REPLBUFF)
                kfree(rp->c_replvec.iov_base);
+        hlist_del(&rp->c_hash);
        list_del(&rp->c_lru);
        --num_drc_entries;
        kmem_cache_free(drc_slab, rp);
@@ -75,30 +101,18 @@ nfsd_reply_cache_free_locked(struct svc_cacherep *rp)
 int nfsd_reply_cache_init(void)
 {
-        int                     i;
-        struct svc_cacherep     *rp;
        drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep),
                                        0, 0, NULL);
        if (!drc_slab)
                goto out_nomem;
-        INIT_LIST_HEAD(&lru_head);
+        cache_hash = kcalloc(HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL);
-        i = CACHESIZE;
-        num_drc_entries = 0;
-        while (i) {
-                rp = nfsd_reply_cache_alloc();
-                if (!rp)
-                        goto out_nomem;
-                ++num_drc_entries;
-                list_add(&rp->c_lru, &lru_head);
-                i--;
-        }
-        cache_hash = kcalloc (HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL);
        if (!cache_hash)
                goto out_nomem;
+        INIT_LIST_HEAD(&lru_head);
+        max_drc_entries = nfsd_cache_size_limit();
+        num_drc_entries = 0;
        cache_disabled = 0;
        return 0;
 out_nomem:
@@ -191,7 +205,7 @@ nfsd_cache_search(struct svc_rqst *rqstp)
 int
 nfsd_cache_lookup(struct svc_rqst *rqstp)
 {
-        struct svc_cacherep     *rp;
+        struct svc_cacherep     *rp, *found;
        __be32                  xid = rqstp->rq_xid;
        u32                     proto =  rqstp->rq_prot,
                                vers = rqstp->rq_vers,
@@ -210,38 +224,48 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
        rtn = RC_DOIT;
        rp = nfsd_cache_search(rqstp);
-        if (rp) {
+        if (rp)
-                nfsdstats.rchits++;
                goto found_entry;
+        /* Try to use the first entry on the LRU */
+        if (!list_empty(&lru_head)) {
+                rp = list_first_entry(&lru_head, struct svc_cacherep, c_lru);
+                if (nfsd_cache_entry_expired(rp) ||
+                    num_drc_entries >= max_drc_entries)
+                        goto setup_entry;
        }
-        nfsdstats.rcmisses++;
-        /* This loop shouldn't take more than a few iterations normally */
+        spin_unlock(&cache_lock);
-        {
+        rp = nfsd_reply_cache_alloc();
-        int     safe = 0;
+        if (!rp) {
-        list_for_each_entry(rp, &lru_head, c_lru) {
+                dprintk("nfsd: unable to allocate DRC entry!\n");
-                if (rp->c_state != RC_INPROG)
+                return RC_DOIT;
-                        break;
-                if (safe++ > CACHESIZE) {
-                        printk("nfsd: loop in repcache LRU list\n");
-                        cache_disabled = 1;
-                        goto out;
-                }
        }
+        spin_lock(&cache_lock);
+        ++num_drc_entries;
+        /*
+         * Must search again just in case someone inserted one
+         * after we dropped the lock above.
+         */
+        found = nfsd_cache_search(rqstp);
+        if (found) {
+                nfsd_reply_cache_free_locked(rp);
+                rp = found;
+                goto found_entry;
        }
-        /* All entries on the LRU are in-progress. This should not happen */
+        /*
-        if (&rp->c_lru == &lru_head) {
+         * We're keeping the one we just allocated. Are we now over the
-                static int      complaints;
+         * limit? Prune one off the tip of the LRU in trade for the one we
+         * just allocated if so.
-                printk(KERN_WARNING "nfsd: all repcache entries locked!\n");
+         */
-                if (++complaints > 5) {
+        if (num_drc_entries >= max_drc_entries)
-                        printk(KERN_WARNING "nfsd: disabling repcache.\n");
+                nfsd_reply_cache_free_locked(list_first_entry(&lru_head,
-                        cache_disabled = 1;
+                                                struct svc_cacherep, c_lru));
-                }
-                goto out;
-        }
+setup_entry:
+        nfsdstats.rcmisses++;
        rqstp->rq_cacherep = rp;
        rp->c_state = RC_INPROG;
        rp->c_xid = xid;
@@ -265,6 +289,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
        return rtn;
 found_entry:
+        nfsdstats.rchits++;
        /* We found a matching entry which is either in progress or done. */
        age = jiffies - rp->c_timestamp;
        lru_put_end(rp);
@@ -295,7 +320,7 @@ found_entry:
                break;
        default:
                printk(KERN_WARNING "nfsd: bad repcache type %d\n", rp->c_type);
-                rp->c_state = RC_UNUSED;
+                nfsd_reply_cache_free_locked(rp);
        }
        goto out;
author	Jeff Layton <jlayton@redhat.com>	2013-02-04 08:18:02 -0500
committer	J. Bruce Fields <bfields@redhat.com>	2013-02-04 17:19:10 -0500
commit	0338dd157282c19696d3c32614a748d0ba814b12 (patch)
tree	4a42757386eab303fd723bcfbcadf843c921ebc8 /fs/nfsd
parent	0ee0bf7ee5b55f232b645c4af0b0c37d4e115a32 (diff)

diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index c0c58471eb45..d213e6e69e46 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c
@@ -10,17 +10,13 @@
10		10
11	#include <linux/slab.h>	11	#include <linux/slab.h>
12	#include <linux/sunrpc/clnt.h>	12	#include <linux/sunrpc/clnt.h>
		13	#include <linux/highmem.h>
13		14
14	#include "nfsd.h"	15	#include "nfsd.h"
15	#include "cache.h"	16	#include "cache.h"
16		17
17	/* Size of reply cache. Common values are:	18	#define NFSDDBG_FACILITY NFSDDBG_REPCACHE
18	* 4.3BSD: 128	19
19	* 4.4BSD: 256
20	* Solaris2: 1024
21	* DEC Unix: 512-4096
22	*/
23	#define CACHESIZE 1024
24	#define HASHSIZE 64	20	#define HASHSIZE 64
25		21
26	static struct hlist_head * cache_hash;	22	static struct hlist_head * cache_hash;
@@ -28,6 +24,7 @@ static struct list_head lru_head;
28	static int cache_disabled = 1;	24	static int cache_disabled = 1;
29	static struct kmem_cache *drc_slab;	25	static struct kmem_cache *drc_slab;
30	static unsigned int num_drc_entries;	26	static unsigned int num_drc_entries;
		27	static unsigned int max_drc_entries;
31		28
32	/*	29	/*
33	* Calculate the hash index from an XID.	30	* Calculate the hash index from an XID.
@@ -48,6 +45,34 @@ static int nfsd_cache_append(struct svc_rqst rqstp, struct kvec vec);
48	*/	45	*/
49	static DEFINE_SPINLOCK(cache_lock);	46	static DEFINE_SPINLOCK(cache_lock);
50		47
		48	/*
		49	* Put a cap on the size of the DRC based on the amount of available
		50	* low memory in the machine.
		51	*
		52	* 64MB: 8192
		53	* 128MB: 11585
		54	* 256MB: 16384
		55	* 512MB: 23170
		56	* 1GB: 32768
		57	* 2GB: 46340
		58	* 4GB: 65536
		59	* 8GB: 92681
		60	* 16GB: 131072
		61	*
		62	* ...with a hard cap of 256k entries. In the worst case, each entry will be
		63	* ~1k, so the above numbers should give a rough max of the amount of memory
		64	* used in k.
		65	*/
		66	static unsigned int
		67	nfsd_cache_size_limit(void)
		68	{
		69	unsigned int limit;
		70	unsigned long low_pages = totalram_pages - totalhigh_pages;
		71
		72	limit = (16 * int_sqrt(low_pages)) << (PAGE_SHIFT-10);
		73	return min_t(unsigned int, limit, 256*1024);
		74	}
		75
51	static struct svc_cacherep *	76	static struct svc_cacherep *
52	nfsd_reply_cache_alloc(void)	77	nfsd_reply_cache_alloc(void)
53	{	78	{
@@ -68,6 +93,7 @@ nfsd_reply_cache_free_locked(struct svc_cacherep *rp)
68	{	93	{
69	if (rp->c_type == RC_REPLBUFF)	94	if (rp->c_type == RC_REPLBUFF)
70	kfree(rp->c_replvec.iov_base);	95	kfree(rp->c_replvec.iov_base);
		96	hlist_del(&rp->c_hash);
71	list_del(&rp->c_lru);	97	list_del(&rp->c_lru);
72	--num_drc_entries;	98	--num_drc_entries;
73	kmem_cache_free(drc_slab, rp);	99	kmem_cache_free(drc_slab, rp);
@@ -75,30 +101,18 @@ nfsd_reply_cache_free_locked(struct svc_cacherep *rp)
75		101
76	int nfsd_reply_cache_init(void)	102	int nfsd_reply_cache_init(void)
77	{	103	{
78	int i;
79	struct svc_cacherep *rp;
80
81	drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep),	104	drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep),
82	0, 0, NULL);	105	0, 0, NULL);
83	if (!drc_slab)	106	if (!drc_slab)
84	goto out_nomem;	107	goto out_nomem;
85		108
86	INIT_LIST_HEAD(&lru_head);	109	cache_hash = kcalloc(HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL);
87	i = CACHESIZE;
88	num_drc_entries = 0;
89	while (i) {
90	rp = nfsd_reply_cache_alloc();
91	if (!rp)
92	goto out_nomem;
93	++num_drc_entries;
94	list_add(&rp->c_lru, &lru_head);
95	i--;
96	}
97
98	cache_hash = kcalloc (HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL);
99	if (!cache_hash)	110	if (!cache_hash)
100	goto out_nomem;	111	goto out_nomem;
101		112
		113	INIT_LIST_HEAD(&lru_head);
		114	max_drc_entries = nfsd_cache_size_limit();
		115	num_drc_entries = 0;
102	cache_disabled = 0;	116	cache_disabled = 0;
103	return 0;	117	return 0;
104	out_nomem:	118	out_nomem:
@@ -191,7 +205,7 @@ nfsd_cache_search(struct svc_rqst *rqstp)
191	int	205	int
192	nfsd_cache_lookup(struct svc_rqst *rqstp)	206	nfsd_cache_lookup(struct svc_rqst *rqstp)
193	{	207	{
194	struct svc_cacherep *rp;	208	struct svc_cacherep rp, found;
195	__be32 xid = rqstp->rq_xid;	209	__be32 xid = rqstp->rq_xid;
196	u32 proto = rqstp->rq_prot,	210	u32 proto = rqstp->rq_prot,
197	vers = rqstp->rq_vers,	211	vers = rqstp->rq_vers,
@@ -210,38 +224,48 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
210	rtn = RC_DOIT;	224	rtn = RC_DOIT;
211		225
212	rp = nfsd_cache_search(rqstp);	226	rp = nfsd_cache_search(rqstp);
213	if (rp) {	227	if (rp)
214	nfsdstats.rchits++;
215	goto found_entry;	228	goto found_entry;
		229
		230	/* Try to use the first entry on the LRU */
		231	if (!list_empty(&lru_head)) {
		232	rp = list_first_entry(&lru_head, struct svc_cacherep, c_lru);
		233	if (nfsd_cache_entry_expired(rp) \|\|
		234	num_drc_entries >= max_drc_entries)
		235	goto setup_entry;
216	}	236	}
217	nfsdstats.rcmisses++;
218		237
219	/* This loop shouldn't take more than a few iterations normally */	238	spin_unlock(&cache_lock);
220	{	239	rp = nfsd_reply_cache_alloc();
221	int safe = 0;	240	if (!rp) {
222	list_for_each_entry(rp, &lru_head, c_lru) {	241	dprintk("nfsd: unable to allocate DRC entry!\n");
223	if (rp->c_state != RC_INPROG)	242	return RC_DOIT;
224	break;
225	if (safe++ > CACHESIZE) {
226	printk("nfsd: loop in repcache LRU list\n");
227	cache_disabled = 1;
228	goto out;
229	}
230	}	243	}
		244	spin_lock(&cache_lock);
		245	++num_drc_entries;
		246
		247	/*
		248	* Must search again just in case someone inserted one
		249	* after we dropped the lock above.
		250	*/
		251	found = nfsd_cache_search(rqstp);
		252	if (found) {
		253	nfsd_reply_cache_free_locked(rp);
		254	rp = found;
		255	goto found_entry;
231	}	256	}
232		257
233	/* All entries on the LRU are in-progress. This should not happen */	258	/*
234	if (&rp->c_lru == &lru_head) {	259	* We're keeping the one we just allocated. Are we now over the
235	static int complaints;	260	* limit? Prune one off the tip of the LRU in trade for the one we
236		261	* just allocated if so.
237	printk(KERN_WARNING "nfsd: all repcache entries locked!\n");	262	*/
238	if (++complaints > 5) {	263	if (num_drc_entries >= max_drc_entries)
239	printk(KERN_WARNING "nfsd: disabling repcache.\n");	264	nfsd_reply_cache_free_locked(list_first_entry(&lru_head,
240	cache_disabled = 1;	265	struct svc_cacherep, c_lru));
241	}
242	goto out;
243	}
244		266
		267	setup_entry:
		268	nfsdstats.rcmisses++;
245	rqstp->rq_cacherep = rp;	269	rqstp->rq_cacherep = rp;
246	rp->c_state = RC_INPROG;	270	rp->c_state = RC_INPROG;
247	rp->c_xid = xid;	271	rp->c_xid = xid;
@@ -265,6 +289,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
265	return rtn;	289	return rtn;
266		290
267	found_entry:	291	found_entry:
		292	nfsdstats.rchits++;
268	/* We found a matching entry which is either in progress or done. */	293	/* We found a matching entry which is either in progress or done. */
269	age = jiffies - rp->c_timestamp;	294	age = jiffies - rp->c_timestamp;
270	lru_put_end(rp);	295	lru_put_end(rp);
@@ -295,7 +320,7 @@ found_entry:
295	break;	320	break;
296	default:	321	default:
297	printk(KERN_WARNING "nfsd: bad repcache type %d\n", rp->c_type);	322	printk(KERN_WARNING "nfsd: bad repcache type %d\n", rp->c_type);
298	rp->c_state = RC_UNUSED;	323	nfsd_reply_cache_free_locked(rp);
299	}	324	}
300		325
301	goto out;	326	goto out;