diff options
author | Chuck Lever <cel@netapp.com> | 2006-03-20 13:44:34 -0500 |
---|---|---|
committer | Trond Myklebust <Trond.Myklebust@netapp.com> | 2006-03-20 13:44:34 -0500 |
commit | 15ce4a0c1ce0d5e288398cb9e5493fd4e55e2025 (patch) | |
tree | b40b3096bbbed8b0aa2a29e9374240dd7eaa47a0 | |
parent | 88467055f7654302c12df74e5fe4d12516656a39 (diff) |
NFS: Replace atomic_t variables in nfs_direct_req with a single spin lock
Three atomic_t variables cause a lot of bus locking. Because they are all
used in the same places in the code, just use a single spin lock.
Now that the atomic_t variables are gone, we can remove the request size
limitation since the code no longer depends on the limited width of atomic_t
on some platforms.
Test plan:
Compile with CONFIG_NFS and CONFIG_NFS_DIRECTIO enabled. Millions of fsx
operations, iozone, OraSim.
Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
-rw-r--r-- | fs/nfs/direct.c | 81 |
1 files changed, 47 insertions, 34 deletions
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index bcbc213b4033..3de7c4b07968 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c | |||
@@ -58,7 +58,6 @@ | |||
58 | #include "iostat.h" | 58 | #include "iostat.h" |
59 | 59 | ||
60 | #define NFSDBG_FACILITY NFSDBG_VFS | 60 | #define NFSDBG_FACILITY NFSDBG_VFS |
61 | #define MAX_DIRECTIO_SIZE (4096UL << PAGE_SHIFT) | ||
62 | 61 | ||
63 | static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty); | 62 | static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty); |
64 | static kmem_cache_t *nfs_direct_cachep; | 63 | static kmem_cache_t *nfs_direct_cachep; |
@@ -68,6 +67,8 @@ static kmem_cache_t *nfs_direct_cachep; | |||
68 | */ | 67 | */ |
69 | struct nfs_direct_req { | 68 | struct nfs_direct_req { |
70 | struct kref kref; /* release manager */ | 69 | struct kref kref; /* release manager */ |
70 | |||
71 | /* I/O parameters */ | ||
71 | struct list_head list; /* nfs_read/write_data structs */ | 72 | struct list_head list; /* nfs_read/write_data structs */ |
72 | struct file * filp; /* file descriptor */ | 73 | struct file * filp; /* file descriptor */ |
73 | struct kiocb * iocb; /* controlling i/o request */ | 74 | struct kiocb * iocb; /* controlling i/o request */ |
@@ -75,12 +76,14 @@ struct nfs_direct_req { | |||
75 | struct inode * inode; /* target file of i/o */ | 76 | struct inode * inode; /* target file of i/o */ |
76 | struct page ** pages; /* pages in our buffer */ | 77 | struct page ** pages; /* pages in our buffer */ |
77 | unsigned int npages; /* count of pages */ | 78 | unsigned int npages; /* count of pages */ |
78 | atomic_t complete, /* i/os we're waiting for */ | 79 | |
79 | count, /* bytes actually processed */ | 80 | /* completion state */ |
81 | spinlock_t lock; /* protect completion state */ | ||
82 | int outstanding; /* i/os we're waiting for */ | ||
83 | ssize_t count, /* bytes actually processed */ | ||
80 | error; /* any reported error */ | 84 | error; /* any reported error */ |
81 | }; | 85 | }; |
82 | 86 | ||
83 | |||
84 | /** | 87 | /** |
85 | * nfs_direct_IO - NFS address space operation for direct I/O | 88 | * nfs_direct_IO - NFS address space operation for direct I/O |
86 | * @rw: direction (read or write) | 89 | * @rw: direction (read or write) |
@@ -110,12 +113,6 @@ static inline int nfs_get_user_pages(int rw, unsigned long user_addr, size_t siz | |||
110 | unsigned long page_count; | 113 | unsigned long page_count; |
111 | size_t array_size; | 114 | size_t array_size; |
112 | 115 | ||
113 | /* set an arbitrary limit to prevent type overflow */ | ||
114 | if (size > MAX_DIRECTIO_SIZE) { | ||
115 | *pages = NULL; | ||
116 | return -EFBIG; | ||
117 | } | ||
118 | |||
119 | page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT; | 116 | page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT; |
120 | page_count -= user_addr >> PAGE_SHIFT; | 117 | page_count -= user_addr >> PAGE_SHIFT; |
121 | 118 | ||
@@ -164,8 +161,10 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void) | |||
164 | init_waitqueue_head(&dreq->wait); | 161 | init_waitqueue_head(&dreq->wait); |
165 | INIT_LIST_HEAD(&dreq->list); | 162 | INIT_LIST_HEAD(&dreq->list); |
166 | dreq->iocb = NULL; | 163 | dreq->iocb = NULL; |
167 | atomic_set(&dreq->count, 0); | 164 | spin_lock_init(&dreq->lock); |
168 | atomic_set(&dreq->error, 0); | 165 | dreq->outstanding = 0; |
166 | dreq->count = 0; | ||
167 | dreq->error = 0; | ||
169 | 168 | ||
170 | return dreq; | 169 | return dreq; |
171 | } | 170 | } |
@@ -181,19 +180,18 @@ static void nfs_direct_req_release(struct kref *kref) | |||
181 | */ | 180 | */ |
182 | static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq) | 181 | static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq) |
183 | { | 182 | { |
184 | int result = -EIOCBQUEUED; | 183 | ssize_t result = -EIOCBQUEUED; |
185 | 184 | ||
186 | /* Async requests don't wait here */ | 185 | /* Async requests don't wait here */ |
187 | if (dreq->iocb) | 186 | if (dreq->iocb) |
188 | goto out; | 187 | goto out; |
189 | 188 | ||
190 | result = wait_event_interruptible(dreq->wait, | 189 | result = wait_event_interruptible(dreq->wait, (dreq->outstanding == 0)); |
191 | (atomic_read(&dreq->complete) == 0)); | ||
192 | 190 | ||
193 | if (!result) | 191 | if (!result) |
194 | result = atomic_read(&dreq->error); | 192 | result = dreq->error; |
195 | if (!result) | 193 | if (!result) |
196 | result = atomic_read(&dreq->count); | 194 | result = dreq->count; |
197 | 195 | ||
198 | out: | 196 | out: |
199 | kref_put(&dreq->kref, nfs_direct_req_release); | 197 | kref_put(&dreq->kref, nfs_direct_req_release); |
@@ -214,9 +212,9 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq) | |||
214 | nfs_free_user_pages(dreq->pages, dreq->npages, 1); | 212 | nfs_free_user_pages(dreq->pages, dreq->npages, 1); |
215 | 213 | ||
216 | if (dreq->iocb) { | 214 | if (dreq->iocb) { |
217 | long res = atomic_read(&dreq->error); | 215 | long res = (long) dreq->error; |
218 | if (!res) | 216 | if (!res) |
219 | res = atomic_read(&dreq->count); | 217 | res = (long) dreq->count; |
220 | aio_complete(dreq->iocb, res, 0); | 218 | aio_complete(dreq->iocb, res, 0); |
221 | } else | 219 | } else |
222 | wake_up(&dreq->wait); | 220 | wake_up(&dreq->wait); |
@@ -233,7 +231,6 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize) | |||
233 | { | 231 | { |
234 | struct list_head *list; | 232 | struct list_head *list; |
235 | struct nfs_direct_req *dreq; | 233 | struct nfs_direct_req *dreq; |
236 | unsigned int reads = 0; | ||
237 | unsigned int rpages = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 234 | unsigned int rpages = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
238 | 235 | ||
239 | dreq = nfs_direct_req_alloc(); | 236 | dreq = nfs_direct_req_alloc(); |
@@ -259,13 +256,12 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize) | |||
259 | list_add(&data->pages, list); | 256 | list_add(&data->pages, list); |
260 | 257 | ||
261 | data->req = (struct nfs_page *) dreq; | 258 | data->req = (struct nfs_page *) dreq; |
262 | reads++; | 259 | dreq->outstanding++; |
263 | if (nbytes <= rsize) | 260 | if (nbytes <= rsize) |
264 | break; | 261 | break; |
265 | nbytes -= rsize; | 262 | nbytes -= rsize; |
266 | } | 263 | } |
267 | kref_get(&dreq->kref); | 264 | kref_get(&dreq->kref); |
268 | atomic_set(&dreq->complete, reads); | ||
269 | return dreq; | 265 | return dreq; |
270 | } | 266 | } |
271 | 267 | ||
@@ -276,13 +272,21 @@ static void nfs_direct_read_result(struct rpc_task *task, void *calldata) | |||
276 | 272 | ||
277 | if (nfs_readpage_result(task, data) != 0) | 273 | if (nfs_readpage_result(task, data) != 0) |
278 | return; | 274 | return; |
275 | |||
276 | spin_lock(&dreq->lock); | ||
277 | |||
279 | if (likely(task->tk_status >= 0)) | 278 | if (likely(task->tk_status >= 0)) |
280 | atomic_add(data->res.count, &dreq->count); | 279 | dreq->count += data->res.count; |
281 | else | 280 | else |
282 | atomic_set(&dreq->error, task->tk_status); | 281 | dreq->error = task->tk_status; |
282 | |||
283 | if (--dreq->outstanding) { | ||
284 | spin_unlock(&dreq->lock); | ||
285 | return; | ||
286 | } | ||
283 | 287 | ||
284 | if (unlikely(atomic_dec_and_test(&dreq->complete))) | 288 | spin_unlock(&dreq->lock); |
285 | nfs_direct_complete(dreq); | 289 | nfs_direct_complete(dreq); |
286 | } | 290 | } |
287 | 291 | ||
288 | static const struct rpc_call_ops nfs_read_direct_ops = { | 292 | static const struct rpc_call_ops nfs_read_direct_ops = { |
@@ -388,7 +392,6 @@ static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize | |||
388 | { | 392 | { |
389 | struct list_head *list; | 393 | struct list_head *list; |
390 | struct nfs_direct_req *dreq; | 394 | struct nfs_direct_req *dreq; |
391 | unsigned int writes = 0; | ||
392 | unsigned int wpages = (wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 395 | unsigned int wpages = (wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
393 | 396 | ||
394 | dreq = nfs_direct_req_alloc(); | 397 | dreq = nfs_direct_req_alloc(); |
@@ -414,16 +417,19 @@ static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize | |||
414 | list_add(&data->pages, list); | 417 | list_add(&data->pages, list); |
415 | 418 | ||
416 | data->req = (struct nfs_page *) dreq; | 419 | data->req = (struct nfs_page *) dreq; |
417 | writes++; | 420 | dreq->outstanding++; |
418 | if (nbytes <= wsize) | 421 | if (nbytes <= wsize) |
419 | break; | 422 | break; |
420 | nbytes -= wsize; | 423 | nbytes -= wsize; |
421 | } | 424 | } |
422 | kref_get(&dreq->kref); | 425 | kref_get(&dreq->kref); |
423 | atomic_set(&dreq->complete, writes); | ||
424 | return dreq; | 426 | return dreq; |
425 | } | 427 | } |
426 | 428 | ||
429 | /* | ||
430 | * NB: Return the value of the first error return code. Subsequent | ||
431 | * errors after the first one are ignored. | ||
432 | */ | ||
427 | static void nfs_direct_write_result(struct rpc_task *task, void *calldata) | 433 | static void nfs_direct_write_result(struct rpc_task *task, void *calldata) |
428 | { | 434 | { |
429 | struct nfs_write_data *data = calldata; | 435 | struct nfs_write_data *data = calldata; |
@@ -436,15 +442,22 @@ static void nfs_direct_write_result(struct rpc_task *task, void *calldata) | |||
436 | if (unlikely(data->res.verf->committed != NFS_FILE_SYNC)) | 442 | if (unlikely(data->res.verf->committed != NFS_FILE_SYNC)) |
437 | status = -EIO; | 443 | status = -EIO; |
438 | 444 | ||
445 | spin_lock(&dreq->lock); | ||
446 | |||
439 | if (likely(status >= 0)) | 447 | if (likely(status >= 0)) |
440 | atomic_add(data->res.count, &dreq->count); | 448 | dreq->count += data->res.count; |
441 | else | 449 | else |
442 | atomic_set(&dreq->error, status); | 450 | dreq->error = status; |
443 | 451 | ||
444 | if (unlikely(atomic_dec_and_test(&dreq->complete))) { | 452 | if (--dreq->outstanding) { |
445 | nfs_end_data_update(data->inode); | 453 | spin_unlock(&dreq->lock); |
446 | nfs_direct_complete(dreq); | 454 | return; |
447 | } | 455 | } |
456 | |||
457 | spin_unlock(&dreq->lock); | ||
458 | |||
459 | nfs_end_data_update(data->inode); | ||
460 | nfs_direct_complete(dreq); | ||
448 | } | 461 | } |
449 | 462 | ||
450 | static const struct rpc_call_ops nfs_write_direct_ops = { | 463 | static const struct rpc_call_ops nfs_write_direct_ops = { |