aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2014-06-04 03:39:26 -0400
committerJ. Bruce Fields <bfields@redhat.com>2014-06-17 16:42:47 -0400
commit6282cd56555347c0ec2addc97bd96b40df0a38b7 (patch)
tree53b6573954b79471cc35ad36640320eb7c9eaa88
parent7171511eaec5bf23fb06078f59784a3a0626b38f (diff)
NFSD: Don't hand out delegations for 30 seconds after recalling them.
If nfsd needs to recall a delegation for some reason it implies that there is contention on the file, so further delegations should not be handed out. The current code fails to do so, and the result is effectively a live-lock under some workloads: a client attempting a conflicting operation on a read-delegated file receives NFS4ERR_DELAY and retries the operation, but by the time it retries the server may already have given out another delegation. We could simply avoid delegations for (say) 30 seconds after any recall, but this is probably too heavy handed. We could keep a list of inodes (or inode numbers or filehandles) for recalled delegations, but that requires memory allocation and searching. The approach taken here is to use a bloom filter to record the filehandles which are currently blocked from delegation, and to accept the cost of a few false positives. We have 2 bloom filters, each of which is valid for 30 seconds. When a delegation is recalled the filehandle is added to one filter and will remain disabled for between 30 and 60 seconds. We keep a count of the number of filehandles that have been added, so when that count is zero we can bypass all other tests. The bloom filters have 256 bits and 3 hash functions. This should allow a couple of dozen blocked filehandles with minimal false positives. If many more filehandles are all blocked at once, behaviour will degrade towards rejecting all delegations for between 30 and 60 seconds, then resetting and allowing new delegations. Signed-off-by: NeilBrown <neilb@suse.de> Signed-off-by: J. Bruce Fields <bfields@redhat.com>
-rw-r--r--fs/nfsd/nfs4state.c78
1 files changed, 78 insertions, 0 deletions
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index c0d45cec9958..2204e1fe5725 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -41,6 +41,7 @@
41#include <linux/ratelimit.h> 41#include <linux/ratelimit.h>
42#include <linux/sunrpc/svcauth_gss.h> 42#include <linux/sunrpc/svcauth_gss.h>
43#include <linux/sunrpc/addr.h> 43#include <linux/sunrpc/addr.h>
44#include <linux/hash.h>
44#include "xdr4.h" 45#include "xdr4.h"
45#include "xdr4cb.h" 46#include "xdr4cb.h"
46#include "vfs.h" 47#include "vfs.h"
@@ -364,6 +365,79 @@ static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp)
364 return openlockstateid(nfs4_alloc_stid(clp, stateid_slab)); 365 return openlockstateid(nfs4_alloc_stid(clp, stateid_slab));
365} 366}
366 367
368/*
369 * When we recall a delegation, we should be careful not to hand it
370 * out again straight away.
371 * To ensure this we keep a pair of bloom filters ('new' and 'old')
372 * in which the filehandles of recalled delegations are "stored".
373 * If a filehandle appear in either filter, a delegation is blocked.
374 * When a delegation is recalled, the filehandle is stored in the "new"
375 * filter.
376 * Every 30 seconds we swap the filters and clear the "new" one,
377 * unless both are empty of course.
378 *
379 * Each filter is 256 bits. We hash the filehandle to 32bit and use the
380 * low 3 bytes as hash-table indices.
381 *
382 * 'state_lock', which is always held when block_delegations() is called,
383 * is used to manage concurrent access. Testing does not need the lock
384 * except when swapping the two filters.
385 */
386static struct bloom_pair {
387 int entries, old_entries;
388 time_t swap_time;
389 int new; /* index into 'set' */
390 DECLARE_BITMAP(set[2], 256);
391} blocked_delegations;
392
393static int delegation_blocked(struct knfsd_fh *fh)
394{
395 u32 hash;
396 struct bloom_pair *bd = &blocked_delegations;
397
398 if (bd->entries == 0)
399 return 0;
400 if (seconds_since_boot() - bd->swap_time > 30) {
401 spin_lock(&state_lock);
402 if (seconds_since_boot() - bd->swap_time > 30) {
403 bd->entries -= bd->old_entries;
404 bd->old_entries = bd->entries;
405 memset(bd->set[bd->new], 0,
406 sizeof(bd->set[0]));
407 bd->new = 1-bd->new;
408 bd->swap_time = seconds_since_boot();
409 }
410 spin_unlock(&state_lock);
411 }
412 hash = arch_fast_hash(&fh->fh_base, fh->fh_size, 0);
413 if (test_bit(hash&255, bd->set[0]) &&
414 test_bit((hash>>8)&255, bd->set[0]) &&
415 test_bit((hash>>16)&255, bd->set[0]))
416 return 1;
417
418 if (test_bit(hash&255, bd->set[1]) &&
419 test_bit((hash>>8)&255, bd->set[1]) &&
420 test_bit((hash>>16)&255, bd->set[1]))
421 return 1;
422
423 return 0;
424}
425
426static void block_delegations(struct knfsd_fh *fh)
427{
428 u32 hash;
429 struct bloom_pair *bd = &blocked_delegations;
430
431 hash = arch_fast_hash(&fh->fh_base, fh->fh_size, 0);
432
433 __set_bit(hash&255, bd->set[bd->new]);
434 __set_bit((hash>>8)&255, bd->set[bd->new]);
435 __set_bit((hash>>16)&255, bd->set[bd->new]);
436 if (bd->entries == 0)
437 bd->swap_time = seconds_since_boot();
438 bd->entries += 1;
439}
440
367static struct nfs4_delegation * 441static struct nfs4_delegation *
368alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh) 442alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh)
369{ 443{
@@ -372,6 +446,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv
372 dprintk("NFSD alloc_init_deleg\n"); 446 dprintk("NFSD alloc_init_deleg\n");
373 if (num_delegations > max_delegations) 447 if (num_delegations > max_delegations)
374 return NULL; 448 return NULL;
449 if (delegation_blocked(&current_fh->fh_handle))
450 return NULL;
375 dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab)); 451 dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab));
376 if (dp == NULL) 452 if (dp == NULL)
377 return dp; 453 return dp;
@@ -2770,6 +2846,8 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
2770 /* Only place dl_time is set; protected by i_lock: */ 2846 /* Only place dl_time is set; protected by i_lock: */
2771 dp->dl_time = get_seconds(); 2847 dp->dl_time = get_seconds();
2772 2848
2849 block_delegations(&dp->dl_fh);
2850
2773 nfsd4_cb_recall(dp); 2851 nfsd4_cb_recall(dp);
2774} 2852}
2775 2853