1 files changed, 45 insertions, 84 deletions
diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c
index 3ea1665efdf0..89622717a06d 100644
--- a/fs/orangefs/waitqueue.c
+++ b/fs/orangefs/waitqueue.c
@@ -17,6 +17,7 @@
 #include "orangefs-bufmap.h"
 static int wait_for_matching_downcall(struct orangefs_kernel_op_s *);
+static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s *);
 /*
 * What we do in this function is to walk the list of operations that are
@@ -170,8 +171,10 @@ retry_servicing:
                        gossip_err("orangefs: %s -- wait timed out; aborting attempt.\n",
                                   op_name);
                }
+                orangefs_clean_up_interrupted_operation(op);
                op->downcall.status = ret;
        } else {
+                spin_unlock(&op->lock);
                /* got matching downcall; make sure status is in errno format */
                op->downcall.status =
                    orangefs_normalize_to_errno(op->downcall.status);
@@ -343,6 +346,7 @@ static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s
                gossip_err("%s: can't get here.\n", __func__);
                spin_unlock(&op->lock);
        }
+        reinit_completion(&op->waitq);
 }
 /*
@@ -359,95 +363,52 @@ static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s
 * EINTR/EIO/ETIMEDOUT indicating we are done trying to service this
 * operation since client-core seems to be exiting too often
 * or if we were interrupted.
+ *
+ * Returns with op->lock taken.
 */
 static int wait_for_matching_downcall(struct orangefs_kernel_op_s *op)
 {
-        int ret = -EINVAL;
+        long timeout, n;
-        DEFINE_WAIT(wait_entry);
-        while (1) {
+        timeout = op->attempts ? op_timeout_secs * HZ : MAX_SCHEDULE_TIMEOUT;
-                spin_lock(&op->lock);
+        n = wait_for_completion_interruptible_timeout(&op->waitq, timeout);
-                prepare_to_wait(&op->waitq, &wait_entry, TASK_INTERRUPTIBLE);
+        spin_lock(&op->lock);
-                if (op_state_serviced(op)) {
-                        spin_unlock(&op->lock);
-                        ret = 0;
-                        break;
-                }
-                if (unlikely(signal_pending(current))) {
+        if (op_state_serviced(op))
-                        gossip_debug(GOSSIP_WAIT_DEBUG,
+                return 0;
-                                     "*** %s:"
-                                     " operation interrupted by a signal (tag "
-                                     "%llu, op %p)\n",
-                                     __func__,
-                                     llu(op->tag),
-                                     op);
-                        orangefs_clean_up_interrupted_operation(op);
-                        ret = -EINTR;
-                        break;
-                }
-                /*
+        if (unlikely(n < 0)) {
-                 * if this was our first attempt and client-core
+                gossip_debug(GOSSIP_WAIT_DEBUG,
-                 * has not purged our operation, we are happy to
+                             "*** %s:"
-                 * simply wait
+                             " operation interrupted by a signal (tag "
-                 */
+                             "%llu, op %p)\n",
-                if (op->attempts == 0 && !op_state_purged(op)) {
+                             __func__,
-                        spin_unlock(&op->lock);
+                             llu(op->tag),
-                        schedule();
+                             op);
-                } else {
+                return -EINTR;
-                        spin_unlock(&op->lock);
-                        /*
-                         * subsequent attempts, we retry exactly once
-                         * with timeouts
-                         */
-                        if (!schedule_timeout(op_timeout_secs * HZ)) {
-                                gossip_debug(GOSSIP_WAIT_DEBUG,
-                                             "*** %s:"
-                                             " operation timed out (tag"
-                                             " %llu, %p, att %d)\n",
-                                             __func__,
-                                             llu(op->tag),
-                                             op,
-                                             op->attempts);
-                                ret = -ETIMEDOUT;
-                                spin_lock(&op->lock);
-                                orangefs_clean_up_interrupted_operation(op);
-                                break;
-                        }
-                }
-                spin_lock(&op->lock);
-                op->attempts++;
-                /*
-                 * if the operation was purged in the meantime, it
-                 * is better to requeue it afresh but ensure that
-                 * we have not been purged repeatedly. This could
-                 * happen if client-core crashes when an op
-                 * is being serviced, so we requeue the op, client
-                 * core crashes again so we requeue the op, client
-                 * core starts, and so on...
-                 */
-                if (op_state_purged(op)) {
-                        ret = (op->attempts < ORANGEFS_PURGE_RETRY_COUNT) ?
-                                 -EAGAIN :
-                                 -EIO;
-                        gossip_debug(GOSSIP_WAIT_DEBUG,
-                                     "*** %s:"
-                                     " operation purged (tag "
-                                     "%llu, %p, att %d)\n",
-                                     __func__,
-                                     llu(op->tag),
-                                     op,
-                                     op->attempts);
-                        orangefs_clean_up_interrupted_operation(op);
-                        break;
-                }
-                spin_unlock(&op->lock);
        }
+        op->attempts++;
-        spin_lock(&op->lock);
+        if (op_state_purged(op)) {
-        finish_wait(&op->waitq, &wait_entry);
+                gossip_debug(GOSSIP_WAIT_DEBUG,
-        spin_unlock(&op->lock);
+                             "*** %s:"
+                             " operation purged (tag "
-        return ret;
+                             "%llu, %p, att %d)\n",
+                             __func__,
+                             llu(op->tag),
+                             op,
+                             op->attempts);
+                return (op->attempts < ORANGEFS_PURGE_RETRY_COUNT) ?
+                         -EAGAIN :
+                         -EIO;
+        }
+        /* must have timed out, then... */
+        gossip_debug(GOSSIP_WAIT_DEBUG,
+                     "*** %s:"
+                     " operation timed out (tag"
+                     " %llu, %p, att %d)\n",
+                     __func__,
+                     llu(op->tag),
+                     op,
+                     op->attempts);
+        return -ETIMEDOUT;
 }

diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c index 3ea1665efdf0..89622717a06d 100644 --- a/fs/orangefs/waitqueue.c +++ b/fs/orangefs/waitqueue.c
@@ -17,6 +17,7 @@
17	#include "orangefs-bufmap.h"	17	#include "orangefs-bufmap.h"
18		18
19	static int wait_for_matching_downcall(struct orangefs_kernel_op_s *);	19	static int wait_for_matching_downcall(struct orangefs_kernel_op_s *);
		20	static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s *);
20		21
21	/*	22	/*
22	* What we do in this function is to walk the list of operations that are	23	* What we do in this function is to walk the list of operations that are
@@ -170,8 +171,10 @@ retry_servicing:
170	gossip_err("orangefs: %s -- wait timed out; aborting attempt.\n",	171	gossip_err("orangefs: %s -- wait timed out; aborting attempt.\n",
171	op_name);	172	op_name);
172	}	173	}
		174	orangefs_clean_up_interrupted_operation(op);
173	op->downcall.status = ret;	175	op->downcall.status = ret;
174	} else {	176	} else {
		177	spin_unlock(&op->lock);
175	/* got matching downcall; make sure status is in errno format */	178	/* got matching downcall; make sure status is in errno format */
176	op->downcall.status =	179	op->downcall.status =
177	orangefs_normalize_to_errno(op->downcall.status);	180	orangefs_normalize_to_errno(op->downcall.status);
@@ -343,6 +346,7 @@ static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s
343	gossip_err("%s: can't get here.\n", __func__);	346	gossip_err("%s: can't get here.\n", __func__);
344	spin_unlock(&op->lock);	347	spin_unlock(&op->lock);
345	}	348	}
		349	reinit_completion(&op->waitq);
346	}	350	}
347		351
348	/*	352	/*
@@ -359,95 +363,52 @@ static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s
359	* EINTR/EIO/ETIMEDOUT indicating we are done trying to service this	363	* EINTR/EIO/ETIMEDOUT indicating we are done trying to service this
360	* operation since client-core seems to be exiting too often	364	* operation since client-core seems to be exiting too often
361	* or if we were interrupted.	365	* or if we were interrupted.
		366	*
		367	* Returns with op->lock taken.
362	*/	368	*/
363	static int wait_for_matching_downcall(struct orangefs_kernel_op_s *op)	369	static int wait_for_matching_downcall(struct orangefs_kernel_op_s *op)
364	{	370	{
365	int ret = -EINVAL;	371	long timeout, n;
366	DEFINE_WAIT(wait_entry);
367		372
368	while (1) {	373	timeout = op->attempts ? op_timeout_secs * HZ : MAX_SCHEDULE_TIMEOUT;
369	spin_lock(&op->lock);	374	n = wait_for_completion_interruptible_timeout(&op->waitq, timeout);
370	prepare_to_wait(&op->waitq, &wait_entry, TASK_INTERRUPTIBLE);	375	spin_lock(&op->lock);
371	if (op_state_serviced(op)) {
372	spin_unlock(&op->lock);
373	ret = 0;
374	break;
375	}
376		376
377	if (unlikely(signal_pending(current))) {	377	if (op_state_serviced(op))
378	gossip_debug(GOSSIP_WAIT_DEBUG,	378	return 0;
379	"*** %s:"
380	" operation interrupted by a signal (tag "
381	"%llu, op %p)\n",
382	__func__,
383	llu(op->tag),
384	op);
385	orangefs_clean_up_interrupted_operation(op);
386	ret = -EINTR;
387	break;
388	}
389		379
390	/*	380	if (unlikely(n < 0)) {
391	* if this was our first attempt and client-core	381	gossip_debug(GOSSIP_WAIT_DEBUG,
392	* has not purged our operation, we are happy to	382	"*** %s:"
393	* simply wait	383	" operation interrupted by a signal (tag "
394	*/	384	"%llu, op %p)\n",
395	if (op->attempts == 0 && !op_state_purged(op)) {	385	__func__,
396	spin_unlock(&op->lock);	386	llu(op->tag),
397	schedule();	387	op);
398	} else {	388	return -EINTR;
399	spin_unlock(&op->lock);
400	/*
401	* subsequent attempts, we retry exactly once
402	* with timeouts
403	*/
404	if (!schedule_timeout(op_timeout_secs * HZ)) {
405	gossip_debug(GOSSIP_WAIT_DEBUG,
406	"*** %s:"
407	" operation timed out (tag"
408	" %llu, %p, att %d)\n",
409	__func__,
410	llu(op->tag),
411	op,
412	op->attempts);
413	ret = -ETIMEDOUT;
414	spin_lock(&op->lock);
415	orangefs_clean_up_interrupted_operation(op);
416	break;
417	}
418	}
419	spin_lock(&op->lock);
420	op->attempts++;
421	/*
422	* if the operation was purged in the meantime, it
423	* is better to requeue it afresh but ensure that
424	* we have not been purged repeatedly. This could
425	* happen if client-core crashes when an op
426	* is being serviced, so we requeue the op, client
427	* core crashes again so we requeue the op, client
428	* core starts, and so on...
429	*/
430	if (op_state_purged(op)) {
431	ret = (op->attempts < ORANGEFS_PURGE_RETRY_COUNT) ?
432	-EAGAIN :
433	-EIO;
434	gossip_debug(GOSSIP_WAIT_DEBUG,
435	"*** %s:"
436	" operation purged (tag "
437	"%llu, %p, att %d)\n",
438	__func__,
439	llu(op->tag),
440	op,
441	op->attempts);
442	orangefs_clean_up_interrupted_operation(op);
443	break;
444	}
445	spin_unlock(&op->lock);
446	}	389	}
447		390	op->attempts++;
448	spin_lock(&op->lock);	391	if (op_state_purged(op)) {
449	finish_wait(&op->waitq, &wait_entry);	392	gossip_debug(GOSSIP_WAIT_DEBUG,
450	spin_unlock(&op->lock);	393	"*** %s:"
451		394	" operation purged (tag "
452	return ret;	395	"%llu, %p, att %d)\n",
		396	__func__,
		397	llu(op->tag),
		398	op,
		399	op->attempts);
		400	return (op->attempts < ORANGEFS_PURGE_RETRY_COUNT) ?
		401	-EAGAIN :
		402	-EIO;
		403	}
		404	/* must have timed out, then... */
		405	gossip_debug(GOSSIP_WAIT_DEBUG,
		406	"*** %s:"
		407	" operation timed out (tag"
		408	" %llu, %p, att %d)\n",
		409	__func__,
		410	llu(op->tag),
		411	op,
		412	op->attempts);
		413	return -ETIMEDOUT;
453	}	414	}