diff options
author | Trond Myklebust <Trond.Myklebust@netapp.com> | 2006-06-25 06:27:31 -0400 |
---|---|---|
committer | Trond Myklebust <Trond.Myklebust@netapp.com> | 2006-06-25 06:27:31 -0400 |
commit | ccf01ef7aa9c6c293a1c64c27331a2ce227916ec (patch) | |
tree | 421fa29aedff988e392f92780637553e275d37a0 | |
parent | 82b145c5a572f7fa7211dffe2097234dc91bcecc (diff) |
Merge branch 'odirect'
-rw-r--r-- | fs/nfs/direct.c | 435 | ||||
-rw-r--r-- | include/linux/nfs_xdr.h | 2 |
2 files changed, 234 insertions, 203 deletions
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index e25b7595b7ad..402005c35ab3 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c | |||
@@ -68,19 +68,25 @@ struct nfs_direct_req { | |||
68 | struct kref kref; /* release manager */ | 68 | struct kref kref; /* release manager */ |
69 | 69 | ||
70 | /* I/O parameters */ | 70 | /* I/O parameters */ |
71 | struct list_head list, /* nfs_read/write_data structs */ | ||
72 | rewrite_list; /* saved nfs_write_data structs */ | ||
71 | struct nfs_open_context *ctx; /* file open context info */ | 73 | struct nfs_open_context *ctx; /* file open context info */ |
72 | struct kiocb * iocb; /* controlling i/o request */ | 74 | struct kiocb * iocb; /* controlling i/o request */ |
73 | struct inode * inode; /* target file of i/o */ | 75 | struct inode * inode; /* target file of i/o */ |
76 | unsigned long user_addr; /* location of user's buffer */ | ||
77 | size_t user_count; /* total bytes to move */ | ||
78 | loff_t pos; /* starting offset in file */ | ||
79 | struct page ** pages; /* pages in our buffer */ | ||
80 | unsigned int npages; /* count of pages */ | ||
74 | 81 | ||
75 | /* completion state */ | 82 | /* completion state */ |
76 | atomic_t io_count; /* i/os we're waiting for */ | ||
77 | spinlock_t lock; /* protect completion state */ | 83 | spinlock_t lock; /* protect completion state */ |
84 | int outstanding; /* i/os we're waiting for */ | ||
78 | ssize_t count, /* bytes actually processed */ | 85 | ssize_t count, /* bytes actually processed */ |
79 | error; /* any reported error */ | 86 | error; /* any reported error */ |
80 | struct completion completion; /* wait for i/o completion */ | 87 | struct completion completion; /* wait for i/o completion */ |
81 | 88 | ||
82 | /* commit state */ | 89 | /* commit state */ |
83 | struct list_head rewrite_list; /* saved nfs_write_data structs */ | ||
84 | struct nfs_write_data * commit_data; /* special write_data for commits */ | 90 | struct nfs_write_data * commit_data; /* special write_data for commits */ |
85 | int flags; | 91 | int flags; |
86 | #define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */ | 92 | #define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */ |
@@ -88,37 +94,8 @@ struct nfs_direct_req { | |||
88 | struct nfs_writeverf verf; /* unstable write verifier */ | 94 | struct nfs_writeverf verf; /* unstable write verifier */ |
89 | }; | 95 | }; |
90 | 96 | ||
97 | static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync); | ||
91 | static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode); | 98 | static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode); |
92 | static const struct rpc_call_ops nfs_write_direct_ops; | ||
93 | |||
94 | static inline void get_dreq(struct nfs_direct_req *dreq) | ||
95 | { | ||
96 | atomic_inc(&dreq->io_count); | ||
97 | } | ||
98 | |||
99 | static inline int put_dreq(struct nfs_direct_req *dreq) | ||
100 | { | ||
101 | return atomic_dec_and_test(&dreq->io_count); | ||
102 | } | ||
103 | |||
104 | /* | ||
105 | * "size" is never larger than rsize or wsize. | ||
106 | */ | ||
107 | static inline int nfs_direct_count_pages(unsigned long user_addr, size_t size) | ||
108 | { | ||
109 | int page_count; | ||
110 | |||
111 | page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
112 | page_count -= user_addr >> PAGE_SHIFT; | ||
113 | BUG_ON(page_count < 0); | ||
114 | |||
115 | return page_count; | ||
116 | } | ||
117 | |||
118 | static inline unsigned int nfs_max_pages(unsigned int size) | ||
119 | { | ||
120 | return (size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | ||
121 | } | ||
122 | 99 | ||
123 | /** | 100 | /** |
124 | * nfs_direct_IO - NFS address space operation for direct I/O | 101 | * nfs_direct_IO - NFS address space operation for direct I/O |
@@ -142,21 +119,50 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_ | |||
142 | return -EINVAL; | 119 | return -EINVAL; |
143 | } | 120 | } |
144 | 121 | ||
145 | static void nfs_direct_dirty_pages(struct page **pages, int npages) | 122 | static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty) |
146 | { | 123 | { |
147 | int i; | 124 | int i; |
148 | for (i = 0; i < npages; i++) { | 125 | for (i = 0; i < npages; i++) { |
149 | struct page *page = pages[i]; | 126 | struct page *page = pages[i]; |
150 | if (!PageCompound(page)) | 127 | if (do_dirty && !PageCompound(page)) |
151 | set_page_dirty_lock(page); | 128 | set_page_dirty_lock(page); |
129 | page_cache_release(page); | ||
152 | } | 130 | } |
131 | kfree(pages); | ||
153 | } | 132 | } |
154 | 133 | ||
155 | static void nfs_direct_release_pages(struct page **pages, int npages) | 134 | static inline int nfs_get_user_pages(int rw, unsigned long user_addr, size_t size, struct page ***pages) |
156 | { | 135 | { |
157 | int i; | 136 | int result = -ENOMEM; |
158 | for (i = 0; i < npages; i++) | 137 | unsigned long page_count; |
159 | page_cache_release(pages[i]); | 138 | size_t array_size; |
139 | |||
140 | page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
141 | page_count -= user_addr >> PAGE_SHIFT; | ||
142 | |||
143 | array_size = (page_count * sizeof(struct page *)); | ||
144 | *pages = kmalloc(array_size, GFP_KERNEL); | ||
145 | if (*pages) { | ||
146 | down_read(¤t->mm->mmap_sem); | ||
147 | result = get_user_pages(current, current->mm, user_addr, | ||
148 | page_count, (rw == READ), 0, | ||
149 | *pages, NULL); | ||
150 | up_read(¤t->mm->mmap_sem); | ||
151 | if (result != page_count) { | ||
152 | /* | ||
153 | * If we got fewer pages than expected from | ||
154 | * get_user_pages(), the user buffer runs off the | ||
155 | * end of a mapping; return EFAULT. | ||
156 | */ | ||
157 | if (result >= 0) { | ||
158 | nfs_free_user_pages(*pages, result, 0); | ||
159 | result = -EFAULT; | ||
160 | } else | ||
161 | kfree(*pages); | ||
162 | *pages = NULL; | ||
163 | } | ||
164 | } | ||
165 | return result; | ||
160 | } | 166 | } |
161 | 167 | ||
162 | static inline struct nfs_direct_req *nfs_direct_req_alloc(void) | 168 | static inline struct nfs_direct_req *nfs_direct_req_alloc(void) |
@@ -168,13 +174,13 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void) | |||
168 | return NULL; | 174 | return NULL; |
169 | 175 | ||
170 | kref_init(&dreq->kref); | 176 | kref_init(&dreq->kref); |
171 | kref_get(&dreq->kref); | ||
172 | init_completion(&dreq->completion); | 177 | init_completion(&dreq->completion); |
178 | INIT_LIST_HEAD(&dreq->list); | ||
173 | INIT_LIST_HEAD(&dreq->rewrite_list); | 179 | INIT_LIST_HEAD(&dreq->rewrite_list); |
174 | dreq->iocb = NULL; | 180 | dreq->iocb = NULL; |
175 | dreq->ctx = NULL; | 181 | dreq->ctx = NULL; |
176 | spin_lock_init(&dreq->lock); | 182 | spin_lock_init(&dreq->lock); |
177 | atomic_set(&dreq->io_count, 0); | 183 | dreq->outstanding = 0; |
178 | dreq->count = 0; | 184 | dreq->count = 0; |
179 | dreq->error = 0; | 185 | dreq->error = 0; |
180 | dreq->flags = 0; | 186 | dreq->flags = 0; |
@@ -215,11 +221,18 @@ out: | |||
215 | } | 221 | } |
216 | 222 | ||
217 | /* | 223 | /* |
218 | * Synchronous I/O uses a stack-allocated iocb. Thus we can't trust | 224 | * We must hold a reference to all the pages in this direct read request |
219 | * the iocb is still valid here if this is a synchronous request. | 225 | * until the RPCs complete. This could be long *after* we are woken up in |
226 | * nfs_direct_wait (for instance, if someone hits ^C on a slow server). | ||
227 | * | ||
228 | * In addition, synchronous I/O uses a stack-allocated iocb. Thus we | ||
229 | * can't trust the iocb is still valid here if this is a synchronous | ||
230 | * request. If the waiter is woken prematurely, the iocb is long gone. | ||
220 | */ | 231 | */ |
221 | static void nfs_direct_complete(struct nfs_direct_req *dreq) | 232 | static void nfs_direct_complete(struct nfs_direct_req *dreq) |
222 | { | 233 | { |
234 | nfs_free_user_pages(dreq->pages, dreq->npages, 1); | ||
235 | |||
223 | if (dreq->iocb) { | 236 | if (dreq->iocb) { |
224 | long res = (long) dreq->error; | 237 | long res = (long) dreq->error; |
225 | if (!res) | 238 | if (!res) |
@@ -232,10 +245,48 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq) | |||
232 | } | 245 | } |
233 | 246 | ||
234 | /* | 247 | /* |
235 | * We must hold a reference to all the pages in this direct read request | 248 | * Note we also set the number of requests we have in the dreq when we are |
236 | * until the RPCs complete. This could be long *after* we are woken up in | 249 | * done. This prevents races with I/O completion so we will always wait |
237 | * nfs_direct_wait (for instance, if someone hits ^C on a slow server). | 250 | * until all requests have been dispatched and completed. |
238 | */ | 251 | */ |
252 | static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize) | ||
253 | { | ||
254 | struct list_head *list; | ||
255 | struct nfs_direct_req *dreq; | ||
256 | unsigned int rpages = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | ||
257 | |||
258 | dreq = nfs_direct_req_alloc(); | ||
259 | if (!dreq) | ||
260 | return NULL; | ||
261 | |||
262 | list = &dreq->list; | ||
263 | for(;;) { | ||
264 | struct nfs_read_data *data = nfs_readdata_alloc(rpages); | ||
265 | |||
266 | if (unlikely(!data)) { | ||
267 | while (!list_empty(list)) { | ||
268 | data = list_entry(list->next, | ||
269 | struct nfs_read_data, pages); | ||
270 | list_del(&data->pages); | ||
271 | nfs_readdata_free(data); | ||
272 | } | ||
273 | kref_put(&dreq->kref, nfs_direct_req_release); | ||
274 | return NULL; | ||
275 | } | ||
276 | |||
277 | INIT_LIST_HEAD(&data->pages); | ||
278 | list_add(&data->pages, list); | ||
279 | |||
280 | data->req = (struct nfs_page *) dreq; | ||
281 | dreq->outstanding++; | ||
282 | if (nbytes <= rsize) | ||
283 | break; | ||
284 | nbytes -= rsize; | ||
285 | } | ||
286 | kref_get(&dreq->kref); | ||
287 | return dreq; | ||
288 | } | ||
289 | |||
239 | static void nfs_direct_read_result(struct rpc_task *task, void *calldata) | 290 | static void nfs_direct_read_result(struct rpc_task *task, void *calldata) |
240 | { | 291 | { |
241 | struct nfs_read_data *data = calldata; | 292 | struct nfs_read_data *data = calldata; |
@@ -244,9 +295,6 @@ static void nfs_direct_read_result(struct rpc_task *task, void *calldata) | |||
244 | if (nfs_readpage_result(task, data) != 0) | 295 | if (nfs_readpage_result(task, data) != 0) |
245 | return; | 296 | return; |
246 | 297 | ||
247 | nfs_direct_dirty_pages(data->pagevec, data->npages); | ||
248 | nfs_direct_release_pages(data->pagevec, data->npages); | ||
249 | |||
250 | spin_lock(&dreq->lock); | 298 | spin_lock(&dreq->lock); |
251 | 299 | ||
252 | if (likely(task->tk_status >= 0)) | 300 | if (likely(task->tk_status >= 0)) |
@@ -254,10 +302,13 @@ static void nfs_direct_read_result(struct rpc_task *task, void *calldata) | |||
254 | else | 302 | else |
255 | dreq->error = task->tk_status; | 303 | dreq->error = task->tk_status; |
256 | 304 | ||
257 | spin_unlock(&dreq->lock); | 305 | if (--dreq->outstanding) { |
306 | spin_unlock(&dreq->lock); | ||
307 | return; | ||
308 | } | ||
258 | 309 | ||
259 | if (put_dreq(dreq)) | 310 | spin_unlock(&dreq->lock); |
260 | nfs_direct_complete(dreq); | 311 | nfs_direct_complete(dreq); |
261 | } | 312 | } |
262 | 313 | ||
263 | static const struct rpc_call_ops nfs_read_direct_ops = { | 314 | static const struct rpc_call_ops nfs_read_direct_ops = { |
@@ -266,60 +317,41 @@ static const struct rpc_call_ops nfs_read_direct_ops = { | |||
266 | }; | 317 | }; |
267 | 318 | ||
268 | /* | 319 | /* |
269 | * For each rsize'd chunk of the user's buffer, dispatch an NFS READ | 320 | * For each nfs_read_data struct that was allocated on the list, dispatch |
270 | * operation. If nfs_readdata_alloc() or get_user_pages() fails, | 321 | * an NFS READ operation |
271 | * bail and stop sending more reads. Read length accounting is | ||
272 | * handled automatically by nfs_direct_read_result(). Otherwise, if | ||
273 | * no requests have been sent, just return an error. | ||
274 | */ | 322 | */ |
275 | static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos) | 323 | static void nfs_direct_read_schedule(struct nfs_direct_req *dreq) |
276 | { | 324 | { |
277 | struct nfs_open_context *ctx = dreq->ctx; | 325 | struct nfs_open_context *ctx = dreq->ctx; |
278 | struct inode *inode = ctx->dentry->d_inode; | 326 | struct inode *inode = ctx->dentry->d_inode; |
327 | struct list_head *list = &dreq->list; | ||
328 | struct page **pages = dreq->pages; | ||
329 | size_t count = dreq->user_count; | ||
330 | loff_t pos = dreq->pos; | ||
279 | size_t rsize = NFS_SERVER(inode)->rsize; | 331 | size_t rsize = NFS_SERVER(inode)->rsize; |
280 | unsigned int rpages = nfs_max_pages(rsize); | 332 | unsigned int curpage, pgbase; |
281 | unsigned int pgbase; | ||
282 | int result; | ||
283 | ssize_t started = 0; | ||
284 | |||
285 | get_dreq(dreq); | ||
286 | 333 | ||
287 | pgbase = user_addr & ~PAGE_MASK; | 334 | curpage = 0; |
335 | pgbase = dreq->user_addr & ~PAGE_MASK; | ||
288 | do { | 336 | do { |
289 | struct nfs_read_data *data; | 337 | struct nfs_read_data *data; |
290 | size_t bytes; | 338 | size_t bytes; |
291 | 339 | ||
292 | result = -ENOMEM; | ||
293 | data = nfs_readdata_alloc(rpages); | ||
294 | if (unlikely(!data)) | ||
295 | break; | ||
296 | |||
297 | bytes = rsize; | 340 | bytes = rsize; |
298 | if (count < rsize) | 341 | if (count < rsize) |
299 | bytes = count; | 342 | bytes = count; |
300 | 343 | ||
301 | data->npages = nfs_direct_count_pages(user_addr, bytes); | 344 | BUG_ON(list_empty(list)); |
302 | down_read(¤t->mm->mmap_sem); | 345 | data = list_entry(list->next, struct nfs_read_data, pages); |
303 | result = get_user_pages(current, current->mm, user_addr, | 346 | list_del_init(&data->pages); |
304 | data->npages, 1, 0, data->pagevec, NULL); | ||
305 | up_read(¤t->mm->mmap_sem); | ||
306 | if (unlikely(result < data->npages)) { | ||
307 | if (result > 0) | ||
308 | nfs_direct_release_pages(data->pagevec, result); | ||
309 | nfs_readdata_release(data); | ||
310 | break; | ||
311 | } | ||
312 | |||
313 | get_dreq(dreq); | ||
314 | 347 | ||
315 | data->req = (struct nfs_page *) dreq; | ||
316 | data->inode = inode; | 348 | data->inode = inode; |
317 | data->cred = ctx->cred; | 349 | data->cred = ctx->cred; |
318 | data->args.fh = NFS_FH(inode); | 350 | data->args.fh = NFS_FH(inode); |
319 | data->args.context = ctx; | 351 | data->args.context = ctx; |
320 | data->args.offset = pos; | 352 | data->args.offset = pos; |
321 | data->args.pgbase = pgbase; | 353 | data->args.pgbase = pgbase; |
322 | data->args.pages = data->pagevec; | 354 | data->args.pages = &pages[curpage]; |
323 | data->args.count = bytes; | 355 | data->args.count = bytes; |
324 | data->res.fattr = &data->fattr; | 356 | data->res.fattr = &data->fattr; |
325 | data->res.eof = 0; | 357 | data->res.eof = 0; |
@@ -342,35 +374,33 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo | |||
342 | bytes, | 374 | bytes, |
343 | (unsigned long long)data->args.offset); | 375 | (unsigned long long)data->args.offset); |
344 | 376 | ||
345 | started += bytes; | ||
346 | user_addr += bytes; | ||
347 | pos += bytes; | 377 | pos += bytes; |
348 | pgbase += bytes; | 378 | pgbase += bytes; |
379 | curpage += pgbase >> PAGE_SHIFT; | ||
349 | pgbase &= ~PAGE_MASK; | 380 | pgbase &= ~PAGE_MASK; |
350 | 381 | ||
351 | count -= bytes; | 382 | count -= bytes; |
352 | } while (count != 0); | 383 | } while (count != 0); |
353 | 384 | BUG_ON(!list_empty(list)); | |
354 | if (put_dreq(dreq)) | ||
355 | nfs_direct_complete(dreq); | ||
356 | |||
357 | if (started) | ||
358 | return 0; | ||
359 | return result < 0 ? (ssize_t) result : -EFAULT; | ||
360 | } | 385 | } |
361 | 386 | ||
362 | static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos) | 387 | static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, unsigned int nr_pages) |
363 | { | 388 | { |
364 | ssize_t result = 0; | 389 | ssize_t result; |
365 | sigset_t oldset; | 390 | sigset_t oldset; |
366 | struct inode *inode = iocb->ki_filp->f_mapping->host; | 391 | struct inode *inode = iocb->ki_filp->f_mapping->host; |
367 | struct rpc_clnt *clnt = NFS_CLIENT(inode); | 392 | struct rpc_clnt *clnt = NFS_CLIENT(inode); |
368 | struct nfs_direct_req *dreq; | 393 | struct nfs_direct_req *dreq; |
369 | 394 | ||
370 | dreq = nfs_direct_req_alloc(); | 395 | dreq = nfs_direct_read_alloc(count, NFS_SERVER(inode)->rsize); |
371 | if (!dreq) | 396 | if (!dreq) |
372 | return -ENOMEM; | 397 | return -ENOMEM; |
373 | 398 | ||
399 | dreq->user_addr = user_addr; | ||
400 | dreq->user_count = count; | ||
401 | dreq->pos = pos; | ||
402 | dreq->pages = pages; | ||
403 | dreq->npages = nr_pages; | ||
374 | dreq->inode = inode; | 404 | dreq->inode = inode; |
375 | dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data); | 405 | dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data); |
376 | if (!is_sync_kiocb(iocb)) | 406 | if (!is_sync_kiocb(iocb)) |
@@ -378,9 +408,8 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size | |||
378 | 408 | ||
379 | nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count); | 409 | nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count); |
380 | rpc_clnt_sigmask(clnt, &oldset); | 410 | rpc_clnt_sigmask(clnt, &oldset); |
381 | result = nfs_direct_read_schedule(dreq, user_addr, count, pos); | 411 | nfs_direct_read_schedule(dreq); |
382 | if (!result) | 412 | result = nfs_direct_wait(dreq); |
383 | result = nfs_direct_wait(dreq); | ||
384 | rpc_clnt_sigunmask(clnt, &oldset); | 413 | rpc_clnt_sigunmask(clnt, &oldset); |
385 | 414 | ||
386 | return result; | 415 | return result; |
@@ -388,10 +417,10 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size | |||
388 | 417 | ||
389 | static void nfs_direct_free_writedata(struct nfs_direct_req *dreq) | 418 | static void nfs_direct_free_writedata(struct nfs_direct_req *dreq) |
390 | { | 419 | { |
391 | while (!list_empty(&dreq->rewrite_list)) { | 420 | list_splice_init(&dreq->rewrite_list, &dreq->list); |
392 | struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages); | 421 | while (!list_empty(&dreq->list)) { |
422 | struct nfs_write_data *data = list_entry(dreq->list.next, struct nfs_write_data, pages); | ||
393 | list_del(&data->pages); | 423 | list_del(&data->pages); |
394 | nfs_direct_release_pages(data->pagevec, data->npages); | ||
395 | nfs_writedata_release(data); | 424 | nfs_writedata_release(data); |
396 | } | 425 | } |
397 | } | 426 | } |
@@ -399,51 +428,14 @@ static void nfs_direct_free_writedata(struct nfs_direct_req *dreq) | |||
399 | #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) | 428 | #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) |
400 | static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) | 429 | static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) |
401 | { | 430 | { |
402 | struct inode *inode = dreq->inode; | 431 | struct list_head *pos; |
403 | struct list_head *p; | ||
404 | struct nfs_write_data *data; | ||
405 | 432 | ||
433 | list_splice_init(&dreq->rewrite_list, &dreq->list); | ||
434 | list_for_each(pos, &dreq->list) | ||
435 | dreq->outstanding++; | ||
406 | dreq->count = 0; | 436 | dreq->count = 0; |
407 | get_dreq(dreq); | ||
408 | |||
409 | list_for_each(p, &dreq->rewrite_list) { | ||
410 | data = list_entry(p, struct nfs_write_data, pages); | ||
411 | |||
412 | get_dreq(dreq); | ||
413 | |||
414 | /* | ||
415 | * Reset data->res. | ||
416 | */ | ||
417 | nfs_fattr_init(&data->fattr); | ||
418 | data->res.count = data->args.count; | ||
419 | memset(&data->verf, 0, sizeof(data->verf)); | ||
420 | |||
421 | /* | ||
422 | * Reuse data->task; data->args should not have changed | ||
423 | * since the original request was sent. | ||
424 | */ | ||
425 | rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC, | ||
426 | &nfs_write_direct_ops, data); | ||
427 | NFS_PROTO(inode)->write_setup(data, FLUSH_STABLE); | ||
428 | |||
429 | data->task.tk_priority = RPC_PRIORITY_NORMAL; | ||
430 | data->task.tk_cookie = (unsigned long) inode; | ||
431 | |||
432 | /* | ||
433 | * We're called via an RPC callback, so BKL is already held. | ||
434 | */ | ||
435 | rpc_execute(&data->task); | ||
436 | |||
437 | dprintk("NFS: %5u rescheduled direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n", | ||
438 | data->task.tk_pid, | ||
439 | inode->i_sb->s_id, | ||
440 | (long long)NFS_FILEID(inode), | ||
441 | data->args.count, | ||
442 | (unsigned long long)data->args.offset); | ||
443 | } | ||
444 | 437 | ||
445 | if (put_dreq(dreq)) | 438 | nfs_direct_write_schedule(dreq, FLUSH_STABLE); |
446 | nfs_direct_write_complete(dreq, inode); | ||
447 | } | 439 | } |
448 | 440 | ||
449 | static void nfs_direct_commit_result(struct rpc_task *task, void *calldata) | 441 | static void nfs_direct_commit_result(struct rpc_task *task, void *calldata) |
@@ -480,8 +472,8 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) | |||
480 | data->cred = dreq->ctx->cred; | 472 | data->cred = dreq->ctx->cred; |
481 | 473 | ||
482 | data->args.fh = NFS_FH(data->inode); | 474 | data->args.fh = NFS_FH(data->inode); |
483 | data->args.offset = 0; | 475 | data->args.offset = dreq->pos; |
484 | data->args.count = 0; | 476 | data->args.count = dreq->user_count; |
485 | data->res.count = 0; | 477 | data->res.count = 0; |
486 | data->res.fattr = &data->fattr; | 478 | data->res.fattr = &data->fattr; |
487 | data->res.verf = &data->verf; | 479 | data->res.verf = &data->verf; |
@@ -543,6 +535,47 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode | |||
543 | } | 535 | } |
544 | #endif | 536 | #endif |
545 | 537 | ||
538 | static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize) | ||
539 | { | ||
540 | struct list_head *list; | ||
541 | struct nfs_direct_req *dreq; | ||
542 | unsigned int wpages = (wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | ||
543 | |||
544 | dreq = nfs_direct_req_alloc(); | ||
545 | if (!dreq) | ||
546 | return NULL; | ||
547 | |||
548 | list = &dreq->list; | ||
549 | for(;;) { | ||
550 | struct nfs_write_data *data = nfs_writedata_alloc(wpages); | ||
551 | |||
552 | if (unlikely(!data)) { | ||
553 | while (!list_empty(list)) { | ||
554 | data = list_entry(list->next, | ||
555 | struct nfs_write_data, pages); | ||
556 | list_del(&data->pages); | ||
557 | nfs_writedata_free(data); | ||
558 | } | ||
559 | kref_put(&dreq->kref, nfs_direct_req_release); | ||
560 | return NULL; | ||
561 | } | ||
562 | |||
563 | INIT_LIST_HEAD(&data->pages); | ||
564 | list_add(&data->pages, list); | ||
565 | |||
566 | data->req = (struct nfs_page *) dreq; | ||
567 | dreq->outstanding++; | ||
568 | if (nbytes <= wsize) | ||
569 | break; | ||
570 | nbytes -= wsize; | ||
571 | } | ||
572 | |||
573 | nfs_alloc_commit_data(dreq); | ||
574 | |||
575 | kref_get(&dreq->kref); | ||
576 | return dreq; | ||
577 | } | ||
578 | |||
546 | static void nfs_direct_write_result(struct rpc_task *task, void *calldata) | 579 | static void nfs_direct_write_result(struct rpc_task *task, void *calldata) |
547 | { | 580 | { |
548 | struct nfs_write_data *data = calldata; | 581 | struct nfs_write_data *data = calldata; |
@@ -572,6 +605,8 @@ static void nfs_direct_write_result(struct rpc_task *task, void *calldata) | |||
572 | } | 605 | } |
573 | } | 606 | } |
574 | } | 607 | } |
608 | /* In case we have to resend */ | ||
609 | data->args.stable = NFS_FILE_SYNC; | ||
575 | 610 | ||
576 | spin_unlock(&dreq->lock); | 611 | spin_unlock(&dreq->lock); |
577 | } | 612 | } |
@@ -585,8 +620,14 @@ static void nfs_direct_write_release(void *calldata) | |||
585 | struct nfs_write_data *data = calldata; | 620 | struct nfs_write_data *data = calldata; |
586 | struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; | 621 | struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; |
587 | 622 | ||
588 | if (put_dreq(dreq)) | 623 | spin_lock(&dreq->lock); |
589 | nfs_direct_write_complete(dreq, data->inode); | 624 | if (--dreq->outstanding) { |
625 | spin_unlock(&dreq->lock); | ||
626 | return; | ||
627 | } | ||
628 | spin_unlock(&dreq->lock); | ||
629 | |||
630 | nfs_direct_write_complete(dreq, data->inode); | ||
590 | } | 631 | } |
591 | 632 | ||
592 | static const struct rpc_call_ops nfs_write_direct_ops = { | 633 | static const struct rpc_call_ops nfs_write_direct_ops = { |
@@ -595,62 +636,41 @@ static const struct rpc_call_ops nfs_write_direct_ops = { | |||
595 | }; | 636 | }; |
596 | 637 | ||
597 | /* | 638 | /* |
598 | * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE | 639 | * For each nfs_write_data struct that was allocated on the list, dispatch |
599 | * operation. If nfs_writedata_alloc() or get_user_pages() fails, | 640 | * an NFS WRITE operation |
600 | * bail and stop sending more writes. Write length accounting is | ||
601 | * handled automatically by nfs_direct_write_result(). Otherwise, if | ||
602 | * no requests have been sent, just return an error. | ||
603 | */ | 641 | */ |
604 | static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos, int sync) | 642 | static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync) |
605 | { | 643 | { |
606 | struct nfs_open_context *ctx = dreq->ctx; | 644 | struct nfs_open_context *ctx = dreq->ctx; |
607 | struct inode *inode = ctx->dentry->d_inode; | 645 | struct inode *inode = ctx->dentry->d_inode; |
646 | struct list_head *list = &dreq->list; | ||
647 | struct page **pages = dreq->pages; | ||
648 | size_t count = dreq->user_count; | ||
649 | loff_t pos = dreq->pos; | ||
608 | size_t wsize = NFS_SERVER(inode)->wsize; | 650 | size_t wsize = NFS_SERVER(inode)->wsize; |
609 | unsigned int wpages = nfs_max_pages(wsize); | 651 | unsigned int curpage, pgbase; |
610 | unsigned int pgbase; | ||
611 | int result; | ||
612 | ssize_t started = 0; | ||
613 | 652 | ||
614 | get_dreq(dreq); | 653 | curpage = 0; |
615 | 654 | pgbase = dreq->user_addr & ~PAGE_MASK; | |
616 | pgbase = user_addr & ~PAGE_MASK; | ||
617 | do { | 655 | do { |
618 | struct nfs_write_data *data; | 656 | struct nfs_write_data *data; |
619 | size_t bytes; | 657 | size_t bytes; |
620 | 658 | ||
621 | result = -ENOMEM; | ||
622 | data = nfs_writedata_alloc(wpages); | ||
623 | if (unlikely(!data)) | ||
624 | break; | ||
625 | |||
626 | bytes = wsize; | 659 | bytes = wsize; |
627 | if (count < wsize) | 660 | if (count < wsize) |
628 | bytes = count; | 661 | bytes = count; |
629 | 662 | ||
630 | data->npages = nfs_direct_count_pages(user_addr, bytes); | 663 | BUG_ON(list_empty(list)); |
631 | down_read(¤t->mm->mmap_sem); | 664 | data = list_entry(list->next, struct nfs_write_data, pages); |
632 | result = get_user_pages(current, current->mm, user_addr, | ||
633 | data->npages, 0, 0, data->pagevec, NULL); | ||
634 | up_read(¤t->mm->mmap_sem); | ||
635 | if (unlikely(result < data->npages)) { | ||
636 | if (result > 0) | ||
637 | nfs_direct_release_pages(data->pagevec, result); | ||
638 | nfs_writedata_release(data); | ||
639 | break; | ||
640 | } | ||
641 | |||
642 | get_dreq(dreq); | ||
643 | |||
644 | list_move_tail(&data->pages, &dreq->rewrite_list); | 665 | list_move_tail(&data->pages, &dreq->rewrite_list); |
645 | 666 | ||
646 | data->req = (struct nfs_page *) dreq; | ||
647 | data->inode = inode; | 667 | data->inode = inode; |
648 | data->cred = ctx->cred; | 668 | data->cred = ctx->cred; |
649 | data->args.fh = NFS_FH(inode); | 669 | data->args.fh = NFS_FH(inode); |
650 | data->args.context = ctx; | 670 | data->args.context = ctx; |
651 | data->args.offset = pos; | 671 | data->args.offset = pos; |
652 | data->args.pgbase = pgbase; | 672 | data->args.pgbase = pgbase; |
653 | data->args.pages = data->pagevec; | 673 | data->args.pages = &pages[curpage]; |
654 | data->args.count = bytes; | 674 | data->args.count = bytes; |
655 | data->res.fattr = &data->fattr; | 675 | data->res.fattr = &data->fattr; |
656 | data->res.count = bytes; | 676 | data->res.count = bytes; |
@@ -674,26 +694,19 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l | |||
674 | bytes, | 694 | bytes, |
675 | (unsigned long long)data->args.offset); | 695 | (unsigned long long)data->args.offset); |
676 | 696 | ||
677 | started += bytes; | ||
678 | user_addr += bytes; | ||
679 | pos += bytes; | 697 | pos += bytes; |
680 | pgbase += bytes; | 698 | pgbase += bytes; |
699 | curpage += pgbase >> PAGE_SHIFT; | ||
681 | pgbase &= ~PAGE_MASK; | 700 | pgbase &= ~PAGE_MASK; |
682 | 701 | ||
683 | count -= bytes; | 702 | count -= bytes; |
684 | } while (count != 0); | 703 | } while (count != 0); |
685 | 704 | BUG_ON(!list_empty(list)); | |
686 | if (put_dreq(dreq)) | ||
687 | nfs_direct_write_complete(dreq, inode); | ||
688 | |||
689 | if (started) | ||
690 | return 0; | ||
691 | return result < 0 ? (ssize_t) result : -EFAULT; | ||
692 | } | 705 | } |
693 | 706 | ||
694 | static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos) | 707 | static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, int nr_pages) |
695 | { | 708 | { |
696 | ssize_t result = 0; | 709 | ssize_t result; |
697 | sigset_t oldset; | 710 | sigset_t oldset; |
698 | struct inode *inode = iocb->ki_filp->f_mapping->host; | 711 | struct inode *inode = iocb->ki_filp->f_mapping->host; |
699 | struct rpc_clnt *clnt = NFS_CLIENT(inode); | 712 | struct rpc_clnt *clnt = NFS_CLIENT(inode); |
@@ -701,14 +714,17 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz | |||
701 | size_t wsize = NFS_SERVER(inode)->wsize; | 714 | size_t wsize = NFS_SERVER(inode)->wsize; |
702 | int sync = 0; | 715 | int sync = 0; |
703 | 716 | ||
704 | dreq = nfs_direct_req_alloc(); | 717 | dreq = nfs_direct_write_alloc(count, wsize); |
705 | if (!dreq) | 718 | if (!dreq) |
706 | return -ENOMEM; | 719 | return -ENOMEM; |
707 | nfs_alloc_commit_data(dreq); | ||
708 | |||
709 | if (dreq->commit_data == NULL || count < wsize) | 720 | if (dreq->commit_data == NULL || count < wsize) |
710 | sync = FLUSH_STABLE; | 721 | sync = FLUSH_STABLE; |
711 | 722 | ||
723 | dreq->user_addr = user_addr; | ||
724 | dreq->user_count = count; | ||
725 | dreq->pos = pos; | ||
726 | dreq->pages = pages; | ||
727 | dreq->npages = nr_pages; | ||
712 | dreq->inode = inode; | 728 | dreq->inode = inode; |
713 | dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data); | 729 | dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data); |
714 | if (!is_sync_kiocb(iocb)) | 730 | if (!is_sync_kiocb(iocb)) |
@@ -719,9 +735,8 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz | |||
719 | nfs_begin_data_update(inode); | 735 | nfs_begin_data_update(inode); |
720 | 736 | ||
721 | rpc_clnt_sigmask(clnt, &oldset); | 737 | rpc_clnt_sigmask(clnt, &oldset); |
722 | result = nfs_direct_write_schedule(dreq, user_addr, count, pos, sync); | 738 | nfs_direct_write_schedule(dreq, sync); |
723 | if (!result) | 739 | result = nfs_direct_wait(dreq); |
724 | result = nfs_direct_wait(dreq); | ||
725 | rpc_clnt_sigunmask(clnt, &oldset); | 740 | rpc_clnt_sigunmask(clnt, &oldset); |
726 | 741 | ||
727 | return result; | 742 | return result; |
@@ -751,6 +766,8 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz | |||
751 | ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos) | 766 | ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos) |
752 | { | 767 | { |
753 | ssize_t retval = -EINVAL; | 768 | ssize_t retval = -EINVAL; |
769 | int page_count; | ||
770 | struct page **pages; | ||
754 | struct file *file = iocb->ki_filp; | 771 | struct file *file = iocb->ki_filp; |
755 | struct address_space *mapping = file->f_mapping; | 772 | struct address_space *mapping = file->f_mapping; |
756 | 773 | ||
@@ -772,7 +789,14 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, | |||
772 | if (retval) | 789 | if (retval) |
773 | goto out; | 790 | goto out; |
774 | 791 | ||
775 | retval = nfs_direct_read(iocb, (unsigned long) buf, count, pos); | 792 | retval = nfs_get_user_pages(READ, (unsigned long) buf, |
793 | count, &pages); | ||
794 | if (retval < 0) | ||
795 | goto out; | ||
796 | page_count = retval; | ||
797 | |||
798 | retval = nfs_direct_read(iocb, (unsigned long) buf, count, pos, | ||
799 | pages, page_count); | ||
776 | if (retval > 0) | 800 | if (retval > 0) |
777 | iocb->ki_pos = pos + retval; | 801 | iocb->ki_pos = pos + retval; |
778 | 802 | ||
@@ -808,6 +832,8 @@ out: | |||
808 | ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos) | 832 | ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos) |
809 | { | 833 | { |
810 | ssize_t retval; | 834 | ssize_t retval; |
835 | int page_count; | ||
836 | struct page **pages; | ||
811 | struct file *file = iocb->ki_filp; | 837 | struct file *file = iocb->ki_filp; |
812 | struct address_space *mapping = file->f_mapping; | 838 | struct address_space *mapping = file->f_mapping; |
813 | 839 | ||
@@ -835,7 +861,14 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t | |||
835 | if (retval) | 861 | if (retval) |
836 | goto out; | 862 | goto out; |
837 | 863 | ||
838 | retval = nfs_direct_write(iocb, (unsigned long) buf, count, pos); | 864 | retval = nfs_get_user_pages(WRITE, (unsigned long) buf, |
865 | count, &pages); | ||
866 | if (retval < 0) | ||
867 | goto out; | ||
868 | page_count = retval; | ||
869 | |||
870 | retval = nfs_direct_write(iocb, (unsigned long) buf, count, | ||
871 | pos, pages, page_count); | ||
839 | 872 | ||
840 | /* | 873 | /* |
841 | * XXX: nfs_end_data_update() already ensures this file's | 874 | * XXX: nfs_end_data_update() already ensures this file's |
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 2d3fb6416d91..7c7320fa51aa 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h | |||
@@ -729,7 +729,6 @@ struct nfs_read_data { | |||
729 | struct list_head pages; /* Coalesced read requests */ | 729 | struct list_head pages; /* Coalesced read requests */ |
730 | struct nfs_page *req; /* multi ops per nfs_page */ | 730 | struct nfs_page *req; /* multi ops per nfs_page */ |
731 | struct page **pagevec; | 731 | struct page **pagevec; |
732 | unsigned int npages; /* active pages in pagevec */ | ||
733 | struct nfs_readargs args; | 732 | struct nfs_readargs args; |
734 | struct nfs_readres res; | 733 | struct nfs_readres res; |
735 | #ifdef CONFIG_NFS_V4 | 734 | #ifdef CONFIG_NFS_V4 |
@@ -748,7 +747,6 @@ struct nfs_write_data { | |||
748 | struct list_head pages; /* Coalesced requests we wish to flush */ | 747 | struct list_head pages; /* Coalesced requests we wish to flush */ |
749 | struct nfs_page *req; /* multi ops per nfs_page */ | 748 | struct nfs_page *req; /* multi ops per nfs_page */ |
750 | struct page **pagevec; | 749 | struct page **pagevec; |
751 | unsigned int npages; /* active pages in pagevec */ | ||
752 | struct nfs_writeargs args; /* argument struct */ | 750 | struct nfs_writeargs args; /* argument struct */ |
753 | struct nfs_writeres res; /* result struct */ | 751 | struct nfs_writeres res; /* result struct */ |
754 | #ifdef CONFIG_NFS_V4 | 752 | #ifdef CONFIG_NFS_V4 |