[PATCH] NLM: fix a client-side race on blocking locks.

If the lock blocks, the server may send us a GRANTED message that races with the reply to our LOCK request. Make sure that we catch the GRANTED by queueing up our request on the nlm_blocked list before we send off the first LOCK rpc call. Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
author: Trond Myklebust <Trond.Myklebust@netapp.com> 2005-06-22 13:16:31 -0400
committer: Trond Myklebust <Trond.Myklebust@netapp.com> 2005-06-22 16:07:42 -0400
commit: ecdbf769b2cb8903e07cd482334c714d89fd1146 (patch)
tree: 9d02ce4daee662c2711762564662cebc521e3da3
parent: 4f15e2b1f4f3a56e46201714b39436c32218d547 (diff)
3 files changed, 96 insertions, 50 deletions
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 44adb84183b6..006bb9e14579 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -42,23 +42,51 @@ struct nlm_wait {
 static LIST_HEAD(nlm_blocked);
 /*
- * Block on a lock
+ * Queue up a lock for blocking so that the GRANTED request can see it
 */
-int
+int nlmclnt_prepare_block(struct nlm_rqst *req, struct nlm_host *host, struct file_lock *fl)
-nlmclnt_block(struct nlm_host *host, struct file_lock *fl, u32 *statp)
+{
+        struct nlm_wait *block;
+        BUG_ON(req->a_block != NULL);
+        block = kmalloc(sizeof(*block), GFP_KERNEL);
+        if (block == NULL)
+                return -ENOMEM;
+        block->b_host = host;
+        block->b_lock = fl;
+        init_waitqueue_head(&block->b_wait);
+        block->b_status = NLM_LCK_BLOCKED;
+        list_add(&block->b_list, &nlm_blocked);
+        req->a_block = block;
+        return 0;
+}
+void nlmclnt_finish_block(struct nlm_rqst *req)
 {
-        struct nlm_wait block, **head;
+        struct nlm_wait *block = req->a_block;
-        int             err;
-        u32             pstate;
-        block.b_host   = host;
+        if (block == NULL)
-        block.b_lock   = fl;
+                return;
-        init_waitqueue_head(&block.b_wait);
+        req->a_block = NULL;
-        block.b_status = NLM_LCK_BLOCKED;
+        list_del(&block->b_list);
-        list_add(&block.b_list, &nlm_blocked);
+        kfree(block);
+}
-        /* Remember pseudo nsm state */
+/*
-        pstate = host->h_state;
+ * Block on a lock
+ */
+long nlmclnt_block(struct nlm_rqst *req, long timeout)
+{
+        struct nlm_wait *block = req->a_block;
+        long ret;
+        /* A borken server might ask us to block even if we didn't
+         * request it. Just say no!
+         */
+        if (!req->a_args.block)
+                return -EAGAIN;
        /* Go to sleep waiting for GRANT callback. Some servers seem
         * to lose callbacks, however, so we're going to poll from
@@ -68,23 +96,16 @@ nlmclnt_block(struct nlm_host *host, struct file_lock *fl, u32 *statp)
         * a 1 minute timeout would do. See the comment before
         * nlmclnt_lock for an explanation.
         */
-        sleep_on_timeout(&block.b_wait, 30*HZ);
+        ret = wait_event_interruptible_timeout(block->b_wait,
+                        block->b_status != NLM_LCK_BLOCKED,
+                        timeout);
-        list_del(&block.b_list);
+        if (block->b_status != NLM_LCK_BLOCKED) {
+                req->a_res.status = block->b_status;
-        if (!signalled()) {
+                block->b_status = NLM_LCK_BLOCKED;
-                *statp = block.b_status;
-                return 0;
        }
-        /* Okay, we were interrupted. Cancel the pending request
+        return ret;
-         * unless the server has rebooted.
-         */
-        if (pstate == host->h_state && (err = nlmclnt_cancel(host, fl)) < 0)
-                printk(KERN_NOTICE
-                        "lockd: CANCEL call failed (errno %d)\n", -err);
-        return -ERESTARTSYS;
 }
 /*
@@ -94,27 +115,23 @@ u32
 nlmclnt_grant(struct nlm_lock *lock)
 {
        struct nlm_wait *block;
+        u32 res = nlm_lck_denied;
        /*
         * Look up blocked request based on arguments. 
         * Warning: must not use cookie to match it!
         */
        list_for_each_entry(block, &nlm_blocked, b_list) {
-                if (nlm_compare_locks(block->b_lock, &lock->fl))
+                if (nlm_compare_locks(block->b_lock, &lock->fl)) {
-                        break;
+                        /* Alright, we found a lock. Set the return status
+                         * and wake up the caller
+                         */
+                        block->b_status = NLM_LCK_GRANTED;
+                        wake_up(&block->b_wait);
+                        res = nlm_granted;
+                }
        }
+        return res;
-        /* Ooops, no blocked request found. */
-        if (block == NULL)
-                return nlm_lck_denied;
-        /* Alright, we found the lock. Set the return status and
-         * wake up the caller.
-         */
-        block->b_status = NLM_LCK_GRANTED;
-        wake_up(&block->b_wait);
-        return nlm_granted;
 }
 /*
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index a4407619b1f1..fd77ed1d710d 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -21,6 +21,7 @@
 #define NLMDBG_FACILITY         NLMDBG_CLIENT
 #define NLMCLNT_GRACE_WAIT      (5*HZ)
+#define NLMCLNT_POLL_TIMEOUT    (30*HZ)
 static int      nlmclnt_test(struct nlm_rqst *, struct file_lock *);
 static int      nlmclnt_lock(struct nlm_rqst *, struct file_lock *);
@@ -553,7 +554,8 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
 {
        struct nlm_host *host = req->a_host;
        struct nlm_res  *resp = &req->a_res;
-        int             status;
+        long timeout;
+        int status;
        if (!host->h_monitored && nsm_monitor(host) < 0) {
                printk(KERN_NOTICE "lockd: failed to monitor %s\n",
@@ -562,15 +564,32 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
                goto out;
        }
-        do {
+        if (req->a_args.block) {
-                if ((status = nlmclnt_call(req, NLMPROC_LOCK)) >= 0) {
+                status = nlmclnt_prepare_block(req, host, fl);
-                        if (resp->status != NLM_LCK_BLOCKED)
-                                break;
-                        status = nlmclnt_block(host, fl, &resp->status);
-                }
                if (status < 0)
                        goto out;
-        } while (resp->status == NLM_LCK_BLOCKED && req->a_args.block);
+        }
+        for(;;) {
+                status = nlmclnt_call(req, NLMPROC_LOCK);
+                if (status < 0)
+                        goto out_unblock;
+                if (resp->status != NLM_LCK_BLOCKED)
+                        break;
+                /* Wait on an NLM blocking lock */
+                timeout = nlmclnt_block(req, NLMCLNT_POLL_TIMEOUT);
+                /* Did a reclaimer thread notify us of a server reboot? */
+                if (resp->status ==  NLM_LCK_DENIED_GRACE_PERIOD)
+                        continue;
+                if (resp->status != NLM_LCK_BLOCKED)
+                        break;
+                if (timeout >= 0)
+                        continue;
+                /* We were interrupted. Send a CANCEL request to the server
+                 * and exit
+                 */
+                status = (int)timeout;
+                goto out_unblock;
+        }
        if (resp->status == NLM_LCK_GRANTED) {
                fl->fl_u.nfs_fl.state = host->h_state;
@@ -579,6 +598,11 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
                do_vfs_lock(fl);
        }
        status = nlm_stat_to_errno(resp->status);
+out_unblock:
+        nlmclnt_finish_block(req);
+        /* Cancel the blocked request if it is still pending */
+        if (resp->status == NLM_LCK_BLOCKED)
+                nlmclnt_cancel(host, fl);
 out:
        nlmclnt_release_lockargs(req);
        return status;
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 0d9d22578212..16d4e5a08e1d 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -72,6 +72,8 @@ struct nlm_lockowner {
        uint32_t pid;
 };
+struct nlm_wait;
 /*
 * Memory chunk for NLM client RPC request.
 */
@@ -81,6 +83,7 @@ struct nlm_rqst {
        struct nlm_host *       a_host;         /* host handle */
        struct nlm_args         a_args;         /* arguments */
        struct nlm_res          a_res;          /* result */
+        struct nlm_wait *       a_block;
        char                    a_owner[NLMCLNT_OHSIZE];
 };
@@ -142,7 +145,9 @@ extern unsigned long		nlmsvc_timeout;
 * Lockd client functions
 */
 struct nlm_rqst * nlmclnt_alloc_call(void);
-int               nlmclnt_block(struct nlm_host *, struct file_lock *, u32 *);
+int               nlmclnt_prepare_block(struct nlm_rqst *req, struct nlm_host *host, struct file_lock *fl);
+void              nlmclnt_finish_block(struct nlm_rqst *req);
+long              nlmclnt_block(struct nlm_rqst *req, long timeout);
 int               nlmclnt_cancel(struct nlm_host *, struct file_lock *);
 u32               nlmclnt_grant(struct nlm_lock *);
 void              nlmclnt_recovery(struct nlm_host *, u32);
author	Trond Myklebust <Trond.Myklebust@netapp.com>	2005-06-22 13:16:31 -0400
committer	Trond Myklebust <Trond.Myklebust@netapp.com>	2005-06-22 16:07:42 -0400
commit	ecdbf769b2cb8903e07cd482334c714d89fd1146 (patch)
tree	9d02ce4daee662c2711762564662cebc521e3da3
parent	4f15e2b1f4f3a56e46201714b39436c32218d547 (diff)

diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 44adb84183b6..006bb9e14579 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c
@@ -42,23 +42,51 @@ struct nlm_wait {
42	static LIST_HEAD(nlm_blocked);	42	static LIST_HEAD(nlm_blocked);
43		43
44	/*	44	/*
45	* Block on a lock	45	* Queue up a lock for blocking so that the GRANTED request can see it
46	*/	46	*/
47	int	47	int nlmclnt_prepare_block(struct nlm_rqst req, struct nlm_host host, struct file_lock *fl)
48	nlmclnt_block(struct nlm_host host, struct file_lock fl, u32 *statp)	48	{
		49	struct nlm_wait *block;
		50
		51	BUG_ON(req->a_block != NULL);
		52	block = kmalloc(sizeof(*block), GFP_KERNEL);
		53	if (block == NULL)
		54	return -ENOMEM;
		55	block->b_host = host;
		56	block->b_lock = fl;
		57	init_waitqueue_head(&block->b_wait);
		58	block->b_status = NLM_LCK_BLOCKED;
		59
		60	list_add(&block->b_list, &nlm_blocked);
		61	req->a_block = block;
		62
		63	return 0;
		64	}
		65
		66	void nlmclnt_finish_block(struct nlm_rqst *req)
49	{	67	{
50	struct nlm_wait block, **head;	68	struct nlm_wait *block = req->a_block;
51	int err;
52	u32 pstate;
53		69
54	block.b_host = host;	70	if (block == NULL)
55	block.b_lock = fl;	71	return;
56	init_waitqueue_head(&block.b_wait);	72	req->a_block = NULL;
57	block.b_status = NLM_LCK_BLOCKED;	73	list_del(&block->b_list);
58	list_add(&block.b_list, &nlm_blocked);	74	kfree(block);
		75	}
59		76
60	/* Remember pseudo nsm state */	77	/*
61	pstate = host->h_state;	78	* Block on a lock
		79	*/
		80	long nlmclnt_block(struct nlm_rqst *req, long timeout)
		81	{
		82	struct nlm_wait *block = req->a_block;
		83	long ret;
		84
		85	/* A borken server might ask us to block even if we didn't
		86	* request it. Just say no!
		87	*/
		88	if (!req->a_args.block)
		89	return -EAGAIN;
62		90
63	/* Go to sleep waiting for GRANT callback. Some servers seem	91	/* Go to sleep waiting for GRANT callback. Some servers seem
64	* to lose callbacks, however, so we're going to poll from	92	* to lose callbacks, however, so we're going to poll from
@@ -68,23 +96,16 @@ nlmclnt_block(struct nlm_host host, struct file_lock fl, u32 *statp)
68	* a 1 minute timeout would do. See the comment before	96	* a 1 minute timeout would do. See the comment before
69	* nlmclnt_lock for an explanation.	97	* nlmclnt_lock for an explanation.
70	*/	98	*/
71	sleep_on_timeout(&block.b_wait, 30*HZ);	99	ret = wait_event_interruptible_timeout(block->b_wait,
		100	block->b_status != NLM_LCK_BLOCKED,
		101	timeout);
72		102
73	list_del(&block.b_list);	103	if (block->b_status != NLM_LCK_BLOCKED) {
74		104	req->a_res.status = block->b_status;
75	if (!signalled()) {	105	block->b_status = NLM_LCK_BLOCKED;
76	*statp = block.b_status;
77	return 0;
78	}	106	}
79		107
80	/* Okay, we were interrupted. Cancel the pending request	108	return ret;
81	* unless the server has rebooted.
82	*/
83	if (pstate == host->h_state && (err = nlmclnt_cancel(host, fl)) < 0)
84	printk(KERN_NOTICE
85	"lockd: CANCEL call failed (errno %d)\n", -err);
86
87	return -ERESTARTSYS;
88	}	109	}
89		110
90	/*	111	/*
@@ -94,27 +115,23 @@ u32
94	nlmclnt_grant(struct nlm_lock *lock)	115	nlmclnt_grant(struct nlm_lock *lock)
95	{	116	{
96	struct nlm_wait *block;	117	struct nlm_wait *block;
		118	u32 res = nlm_lck_denied;
97		119
98	/*	120	/*
99	* Look up blocked request based on arguments.	121	* Look up blocked request based on arguments.
100	* Warning: must not use cookie to match it!	122	* Warning: must not use cookie to match it!
101	*/	123	*/
102	list_for_each_entry(block, &nlm_blocked, b_list) {	124	list_for_each_entry(block, &nlm_blocked, b_list) {
103	if (nlm_compare_locks(block->b_lock, &lock->fl))	125	if (nlm_compare_locks(block->b_lock, &lock->fl)) {
104	break;	126	/* Alright, we found a lock. Set the return status
		127	* and wake up the caller
		128	*/
		129	block->b_status = NLM_LCK_GRANTED;
		130	wake_up(&block->b_wait);
		131	res = nlm_granted;
		132	}
105	}	133	}
106		134	return res;
107	/* Ooops, no blocked request found. */
108	if (block == NULL)
109	return nlm_lck_denied;
110
111	/* Alright, we found the lock. Set the return status and
112	* wake up the caller.
113	*/
114	block->b_status = NLM_LCK_GRANTED;
115	wake_up(&block->b_wait);
116
117	return nlm_granted;
118	}	135	}
119		136
120	/*	137	/*


diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index a4407619b1f1..fd77ed1d710d 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c
@@ -21,6 +21,7 @@
21		21
22	#define NLMDBG_FACILITY NLMDBG_CLIENT	22	#define NLMDBG_FACILITY NLMDBG_CLIENT
23	#define NLMCLNT_GRACE_WAIT (5*HZ)	23	#define NLMCLNT_GRACE_WAIT (5*HZ)
		24	#define NLMCLNT_POLL_TIMEOUT (30*HZ)
24		25
25	static int nlmclnt_test(struct nlm_rqst , struct file_lock );	26	static int nlmclnt_test(struct nlm_rqst , struct file_lock );
26	static int nlmclnt_lock(struct nlm_rqst , struct file_lock );	27	static int nlmclnt_lock(struct nlm_rqst , struct file_lock );
@@ -553,7 +554,8 @@ nlmclnt_lock(struct nlm_rqst req, struct file_lock fl)
553	{	554	{
554	struct nlm_host *host = req->a_host;	555	struct nlm_host *host = req->a_host;
555	struct nlm_res *resp = &req->a_res;	556	struct nlm_res *resp = &req->a_res;
556	int status;	557	long timeout;
		558	int status;
557		559
558	if (!host->h_monitored && nsm_monitor(host) < 0) {	560	if (!host->h_monitored && nsm_monitor(host) < 0) {
559	printk(KERN_NOTICE "lockd: failed to monitor %s\n",	561	printk(KERN_NOTICE "lockd: failed to monitor %s\n",
@@ -562,15 +564,32 @@ nlmclnt_lock(struct nlm_rqst req, struct file_lock fl)
562	goto out;	564	goto out;
563	}	565	}
564		566
565	do {	567	if (req->a_args.block) {
566	if ((status = nlmclnt_call(req, NLMPROC_LOCK)) >= 0) {	568	status = nlmclnt_prepare_block(req, host, fl);
567	if (resp->status != NLM_LCK_BLOCKED)
568	break;
569	status = nlmclnt_block(host, fl, &resp->status);
570	}
571	if (status < 0)	569	if (status < 0)
572	goto out;	570	goto out;
573	} while (resp->status == NLM_LCK_BLOCKED && req->a_args.block);	571	}
		572	for(;;) {
		573	status = nlmclnt_call(req, NLMPROC_LOCK);
		574	if (status < 0)
		575	goto out_unblock;
		576	if (resp->status != NLM_LCK_BLOCKED)
		577	break;
		578	/* Wait on an NLM blocking lock */
		579	timeout = nlmclnt_block(req, NLMCLNT_POLL_TIMEOUT);
		580	/* Did a reclaimer thread notify us of a server reboot? */
		581	if (resp->status == NLM_LCK_DENIED_GRACE_PERIOD)
		582	continue;
		583	if (resp->status != NLM_LCK_BLOCKED)
		584	break;
		585	if (timeout >= 0)
		586	continue;
		587	/* We were interrupted. Send a CANCEL request to the server
		588	* and exit
		589	*/
		590	status = (int)timeout;
		591	goto out_unblock;
		592	}
574		593
575	if (resp->status == NLM_LCK_GRANTED) {	594	if (resp->status == NLM_LCK_GRANTED) {
576	fl->fl_u.nfs_fl.state = host->h_state;	595	fl->fl_u.nfs_fl.state = host->h_state;
@@ -579,6 +598,11 @@ nlmclnt_lock(struct nlm_rqst req, struct file_lock fl)
579	do_vfs_lock(fl);	598	do_vfs_lock(fl);
580	}	599	}
581	status = nlm_stat_to_errno(resp->status);	600	status = nlm_stat_to_errno(resp->status);
		601	out_unblock:
		602	nlmclnt_finish_block(req);
		603	/* Cancel the blocked request if it is still pending */
		604	if (resp->status == NLM_LCK_BLOCKED)
		605	nlmclnt_cancel(host, fl);
582	out:	606	out:
583	nlmclnt_release_lockargs(req);	607	nlmclnt_release_lockargs(req);
584	return status;	608	return status;


diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 0d9d22578212..16d4e5a08e1d 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h
@@ -72,6 +72,8 @@ struct nlm_lockowner {
72	uint32_t pid;	72	uint32_t pid;
73	};	73	};
74		74
		75	struct nlm_wait;
		76
75	/*	77	/*
76	* Memory chunk for NLM client RPC request.	78	* Memory chunk for NLM client RPC request.
77	*/	79	*/
@@ -81,6 +83,7 @@ struct nlm_rqst {
81	struct nlm_host * a_host; /* host handle */	83	struct nlm_host * a_host; /* host handle */
82	struct nlm_args a_args; /* arguments */	84	struct nlm_args a_args; /* arguments */
83	struct nlm_res a_res; /* result */	85	struct nlm_res a_res; /* result */
		86	struct nlm_wait * a_block;
84	char a_owner[NLMCLNT_OHSIZE];	87	char a_owner[NLMCLNT_OHSIZE];
85	};	88	};
86		89
@@ -142,7 +145,9 @@ extern unsigned long nlmsvc_timeout;
142	* Lockd client functions	145	* Lockd client functions
143	*/	146	*/
144	struct nlm_rqst * nlmclnt_alloc_call(void);	147	struct nlm_rqst * nlmclnt_alloc_call(void);
145	int nlmclnt_block(struct nlm_host , struct file_lock , u32 *);	148	int nlmclnt_prepare_block(struct nlm_rqst req, struct nlm_host host, struct file_lock *fl);
		149	void nlmclnt_finish_block(struct nlm_rqst *req);
		150	long nlmclnt_block(struct nlm_rqst *req, long timeout);
146	int nlmclnt_cancel(struct nlm_host , struct file_lock );	151	int nlmclnt_cancel(struct nlm_host , struct file_lock );
147	u32 nlmclnt_grant(struct nlm_lock *);	152	u32 nlmclnt_grant(struct nlm_lock *);
148	void nlmclnt_recovery(struct nlm_host *, u32);	153	void nlmclnt_recovery(struct nlm_host *, u32);