diff options
author | Tejun Heo <tj@kernel.org> | 2011-01-03 08:49:46 -0500 |
---|---|---|
committer | Sage Weil <sage@newdream.net> | 2011-01-12 18:15:14 -0500 |
commit | f363e45fd1184219b472ea549cb7e192e24ef4d2 (patch) | |
tree | 1332feb2f7a0a47ce482a0fd4ee9afb547a27090 | |
parent | 01e6acc4ea4c284c44bfb3d46c76f4ae580c6435 (diff) |
net/ceph: make ceph_msgr_wq non-reentrant
ceph messenger code does a rather complex dancing around multithread
workqueue to make sure the same work item isn't executed concurrently
on different CPUs. This restriction can be provided by workqueue with
WQ_NON_REENTRANT.
Make ceph_msgr_wq non-reentrant workqueue with the default concurrency
level and remove the QUEUED/BUSY logic.
* This removes backoff handling in con_work() but it couldn't reliably
block execution of con_work() to begin with - queue_con() can be
called after the work started but before BUSY is set. It seems that
it was an optimization for a rather cold path and can be safely
removed.
* The number of concurrent work items is bound by the number of
connections and connetions are independent from each other. With
the default concurrency level, different connections will be
executed independently.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Sage Weil <sage@newdream.net>
Cc: ceph-devel@vger.kernel.org
Signed-off-by: Sage Weil <sage@newdream.net>
-rw-r--r-- | include/linux/ceph/messenger.h | 5 | ||||
-rw-r--r-- | net/ceph/messenger.c | 46 |
2 files changed, 2 insertions, 49 deletions
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index a108b425fee2..c3011beac30d 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h | |||
@@ -110,17 +110,12 @@ struct ceph_msg_pos { | |||
110 | 110 | ||
111 | /* | 111 | /* |
112 | * ceph_connection state bit flags | 112 | * ceph_connection state bit flags |
113 | * | ||
114 | * QUEUED and BUSY are used together to ensure that only a single | ||
115 | * thread is currently opening, reading or writing data to the socket. | ||
116 | */ | 113 | */ |
117 | #define LOSSYTX 0 /* we can close channel or drop messages on errors */ | 114 | #define LOSSYTX 0 /* we can close channel or drop messages on errors */ |
118 | #define CONNECTING 1 | 115 | #define CONNECTING 1 |
119 | #define NEGOTIATING 2 | 116 | #define NEGOTIATING 2 |
120 | #define KEEPALIVE_PENDING 3 | 117 | #define KEEPALIVE_PENDING 3 |
121 | #define WRITE_PENDING 4 /* we have data ready to send */ | 118 | #define WRITE_PENDING 4 /* we have data ready to send */ |
122 | #define QUEUED 5 /* there is work queued on this connection */ | ||
123 | #define BUSY 6 /* work is being done */ | ||
124 | #define STANDBY 8 /* no outgoing messages, socket closed. we keep | 119 | #define STANDBY 8 /* no outgoing messages, socket closed. we keep |
125 | * the ceph_connection around to maintain shared | 120 | * the ceph_connection around to maintain shared |
126 | * state with the peer. */ | 121 | * state with the peer. */ |
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index b6ff4a1519ab..dff633d62e5b 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c | |||
@@ -96,7 +96,7 @@ struct workqueue_struct *ceph_msgr_wq; | |||
96 | 96 | ||
97 | int ceph_msgr_init(void) | 97 | int ceph_msgr_init(void) |
98 | { | 98 | { |
99 | ceph_msgr_wq = create_workqueue("ceph-msgr"); | 99 | ceph_msgr_wq = alloc_workqueue("ceph-msgr", WQ_NON_REENTRANT, 0); |
100 | if (!ceph_msgr_wq) { | 100 | if (!ceph_msgr_wq) { |
101 | pr_err("msgr_init failed to create workqueue\n"); | 101 | pr_err("msgr_init failed to create workqueue\n"); |
102 | return -ENOMEM; | 102 | return -ENOMEM; |
@@ -1920,20 +1920,6 @@ bad_tag: | |||
1920 | /* | 1920 | /* |
1921 | * Atomically queue work on a connection. Bump @con reference to | 1921 | * Atomically queue work on a connection. Bump @con reference to |
1922 | * avoid races with connection teardown. | 1922 | * avoid races with connection teardown. |
1923 | * | ||
1924 | * There is some trickery going on with QUEUED and BUSY because we | ||
1925 | * only want a _single_ thread operating on each connection at any | ||
1926 | * point in time, but we want to use all available CPUs. | ||
1927 | * | ||
1928 | * The worker thread only proceeds if it can atomically set BUSY. It | ||
1929 | * clears QUEUED and does it's thing. When it thinks it's done, it | ||
1930 | * clears BUSY, then rechecks QUEUED.. if it's set again, it loops | ||
1931 | * (tries again to set BUSY). | ||
1932 | * | ||
1933 | * To queue work, we first set QUEUED, _then_ if BUSY isn't set, we | ||
1934 | * try to queue work. If that fails (work is already queued, or BUSY) | ||
1935 | * we give up (work also already being done or is queued) but leave QUEUED | ||
1936 | * set so that the worker thread will loop if necessary. | ||
1937 | */ | 1923 | */ |
1938 | static void queue_con(struct ceph_connection *con) | 1924 | static void queue_con(struct ceph_connection *con) |
1939 | { | 1925 | { |
@@ -1948,11 +1934,7 @@ static void queue_con(struct ceph_connection *con) | |||
1948 | return; | 1934 | return; |
1949 | } | 1935 | } |
1950 | 1936 | ||
1951 | set_bit(QUEUED, &con->state); | 1937 | if (!queue_delayed_work(ceph_msgr_wq, &con->work, 0)) { |
1952 | if (test_bit(BUSY, &con->state)) { | ||
1953 | dout("queue_con %p - already BUSY\n", con); | ||
1954 | con->ops->put(con); | ||
1955 | } else if (!queue_work(ceph_msgr_wq, &con->work.work)) { | ||
1956 | dout("queue_con %p - already queued\n", con); | 1938 | dout("queue_con %p - already queued\n", con); |
1957 | con->ops->put(con); | 1939 | con->ops->put(con); |
1958 | } else { | 1940 | } else { |
@@ -1967,15 +1949,6 @@ static void con_work(struct work_struct *work) | |||
1967 | { | 1949 | { |
1968 | struct ceph_connection *con = container_of(work, struct ceph_connection, | 1950 | struct ceph_connection *con = container_of(work, struct ceph_connection, |
1969 | work.work); | 1951 | work.work); |
1970 | int backoff = 0; | ||
1971 | |||
1972 | more: | ||
1973 | if (test_and_set_bit(BUSY, &con->state) != 0) { | ||
1974 | dout("con_work %p BUSY already set\n", con); | ||
1975 | goto out; | ||
1976 | } | ||
1977 | dout("con_work %p start, clearing QUEUED\n", con); | ||
1978 | clear_bit(QUEUED, &con->state); | ||
1979 | 1952 | ||
1980 | mutex_lock(&con->mutex); | 1953 | mutex_lock(&con->mutex); |
1981 | 1954 | ||
@@ -1994,28 +1967,13 @@ more: | |||
1994 | try_read(con) < 0 || | 1967 | try_read(con) < 0 || |
1995 | try_write(con) < 0) { | 1968 | try_write(con) < 0) { |
1996 | mutex_unlock(&con->mutex); | 1969 | mutex_unlock(&con->mutex); |
1997 | backoff = 1; | ||
1998 | ceph_fault(con); /* error/fault path */ | 1970 | ceph_fault(con); /* error/fault path */ |
1999 | goto done_unlocked; | 1971 | goto done_unlocked; |
2000 | } | 1972 | } |
2001 | 1973 | ||
2002 | done: | 1974 | done: |
2003 | mutex_unlock(&con->mutex); | 1975 | mutex_unlock(&con->mutex); |
2004 | |||
2005 | done_unlocked: | 1976 | done_unlocked: |
2006 | clear_bit(BUSY, &con->state); | ||
2007 | dout("con->state=%lu\n", con->state); | ||
2008 | if (test_bit(QUEUED, &con->state)) { | ||
2009 | if (!backoff || test_bit(OPENING, &con->state)) { | ||
2010 | dout("con_work %p QUEUED reset, looping\n", con); | ||
2011 | goto more; | ||
2012 | } | ||
2013 | dout("con_work %p QUEUED reset, but just faulted\n", con); | ||
2014 | clear_bit(QUEUED, &con->state); | ||
2015 | } | ||
2016 | dout("con_work %p done\n", con); | ||
2017 | |||
2018 | out: | ||
2019 | con->ops->put(con); | 1977 | con->ops->put(con); |
2020 | } | 1978 | } |
2021 | 1979 | ||