diff options
Diffstat (limited to 'fs/nfs')
43 files changed, 5047 insertions, 911 deletions
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 81515545ba7..dbcd82126ae 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig | |||
@@ -77,6 +77,7 @@ config NFS_V4 | |||
77 | config NFS_V4_1 | 77 | config NFS_V4_1 |
78 | bool "NFS client support for NFSv4.1 (EXPERIMENTAL)" | 78 | bool "NFS client support for NFSv4.1 (EXPERIMENTAL)" |
79 | depends on NFS_FS && NFS_V4 && EXPERIMENTAL | 79 | depends on NFS_FS && NFS_V4 && EXPERIMENTAL |
80 | select SUNRPC_BACKCHANNEL | ||
80 | select PNFS_FILE_LAYOUT | 81 | select PNFS_FILE_LAYOUT |
81 | help | 82 | help |
82 | This option enables support for minor version 1 of the NFSv4 protocol | 83 | This option enables support for minor version 1 of the NFSv4 protocol |
@@ -87,15 +88,15 @@ config NFS_V4_1 | |||
87 | config PNFS_FILE_LAYOUT | 88 | config PNFS_FILE_LAYOUT |
88 | tristate | 89 | tristate |
89 | 90 | ||
91 | config PNFS_BLOCK | ||
92 | tristate | ||
93 | depends on NFS_FS && NFS_V4_1 && BLK_DEV_DM | ||
94 | default m | ||
95 | |||
90 | config PNFS_OBJLAYOUT | 96 | config PNFS_OBJLAYOUT |
91 | tristate "Provide support for the pNFS Objects Layout Driver for NFSv4.1 pNFS (EXPERIMENTAL)" | 97 | tristate |
92 | depends on NFS_FS && NFS_V4_1 && SCSI_OSD_ULD | 98 | depends on NFS_FS && NFS_V4_1 && SCSI_OSD_ULD |
93 | help | 99 | default m |
94 | Say M here if you want your pNFS client to support the Objects Layout Driver. | ||
95 | Requires the SCSI osd initiator library (SCSI_OSD_INITIATOR) and | ||
96 | upper level driver (SCSI_OSD_ULD). | ||
97 | |||
98 | If unsure, say N. | ||
99 | 100 | ||
100 | config ROOT_NFS | 101 | config ROOT_NFS |
101 | bool "Root file system on NFS" | 102 | bool "Root file system on NFS" |
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 6a34f7dd0e6..b58613d0abb 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile | |||
@@ -23,3 +23,4 @@ obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o | |||
23 | nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o | 23 | nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o |
24 | 24 | ||
25 | obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/ | 25 | obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/ |
26 | obj-$(CONFIG_PNFS_BLOCK) += blocklayout/ | ||
diff --git a/fs/nfs/blocklayout/Makefile b/fs/nfs/blocklayout/Makefile new file mode 100644 index 00000000000..d5815505c02 --- /dev/null +++ b/fs/nfs/blocklayout/Makefile | |||
@@ -0,0 +1,5 @@ | |||
1 | # | ||
2 | # Makefile for the pNFS block layout driver kernel module | ||
3 | # | ||
4 | obj-$(CONFIG_PNFS_BLOCK) += blocklayoutdriver.o | ||
5 | blocklayoutdriver-objs := blocklayout.o extents.o blocklayoutdev.o blocklayoutdm.o | ||
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c new file mode 100644 index 00000000000..281ae95932c --- /dev/null +++ b/fs/nfs/blocklayout/blocklayout.c | |||
@@ -0,0 +1,1024 @@ | |||
1 | /* | ||
2 | * linux/fs/nfs/blocklayout/blocklayout.c | ||
3 | * | ||
4 | * Module for the NFSv4.1 pNFS block layout driver. | ||
5 | * | ||
6 | * Copyright (c) 2006 The Regents of the University of Michigan. | ||
7 | * All rights reserved. | ||
8 | * | ||
9 | * Andy Adamson <andros@citi.umich.edu> | ||
10 | * Fred Isaman <iisaman@umich.edu> | ||
11 | * | ||
12 | * permission is granted to use, copy, create derivative works and | ||
13 | * redistribute this software and such derivative works for any purpose, | ||
14 | * so long as the name of the university of michigan is not used in | ||
15 | * any advertising or publicity pertaining to the use or distribution | ||
16 | * of this software without specific, written prior authorization. if | ||
17 | * the above copyright notice or any other identification of the | ||
18 | * university of michigan is included in any copy of any portion of | ||
19 | * this software, then the disclaimer below must also be included. | ||
20 | * | ||
21 | * this software is provided as is, without representation from the | ||
22 | * university of michigan as to its fitness for any purpose, and without | ||
23 | * warranty by the university of michigan of any kind, either express | ||
24 | * or implied, including without limitation the implied warranties of | ||
25 | * merchantability and fitness for a particular purpose. the regents | ||
26 | * of the university of michigan shall not be liable for any damages, | ||
27 | * including special, indirect, incidental, or consequential damages, | ||
28 | * with respect to any claim arising out or in connection with the use | ||
29 | * of the software, even if it has been or is hereafter advised of the | ||
30 | * possibility of such damages. | ||
31 | */ | ||
32 | |||
33 | #include <linux/module.h> | ||
34 | #include <linux/init.h> | ||
35 | #include <linux/mount.h> | ||
36 | #include <linux/namei.h> | ||
37 | #include <linux/bio.h> /* struct bio */ | ||
38 | #include <linux/buffer_head.h> /* various write calls */ | ||
39 | #include <linux/prefetch.h> | ||
40 | |||
41 | #include "blocklayout.h" | ||
42 | |||
43 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | ||
44 | |||
45 | MODULE_LICENSE("GPL"); | ||
46 | MODULE_AUTHOR("Andy Adamson <andros@citi.umich.edu>"); | ||
47 | MODULE_DESCRIPTION("The NFSv4.1 pNFS Block layout driver"); | ||
48 | |||
49 | struct dentry *bl_device_pipe; | ||
50 | wait_queue_head_t bl_wq; | ||
51 | |||
52 | static void print_page(struct page *page) | ||
53 | { | ||
54 | dprintk("PRINTPAGE page %p\n", page); | ||
55 | dprintk(" PagePrivate %d\n", PagePrivate(page)); | ||
56 | dprintk(" PageUptodate %d\n", PageUptodate(page)); | ||
57 | dprintk(" PageError %d\n", PageError(page)); | ||
58 | dprintk(" PageDirty %d\n", PageDirty(page)); | ||
59 | dprintk(" PageReferenced %d\n", PageReferenced(page)); | ||
60 | dprintk(" PageLocked %d\n", PageLocked(page)); | ||
61 | dprintk(" PageWriteback %d\n", PageWriteback(page)); | ||
62 | dprintk(" PageMappedToDisk %d\n", PageMappedToDisk(page)); | ||
63 | dprintk("\n"); | ||
64 | } | ||
65 | |||
66 | /* Given the be associated with isect, determine if page data needs to be | ||
67 | * initialized. | ||
68 | */ | ||
69 | static int is_hole(struct pnfs_block_extent *be, sector_t isect) | ||
70 | { | ||
71 | if (be->be_state == PNFS_BLOCK_NONE_DATA) | ||
72 | return 1; | ||
73 | else if (be->be_state != PNFS_BLOCK_INVALID_DATA) | ||
74 | return 0; | ||
75 | else | ||
76 | return !bl_is_sector_init(be->be_inval, isect); | ||
77 | } | ||
78 | |||
79 | /* Given the be associated with isect, determine if page data can be | ||
80 | * written to disk. | ||
81 | */ | ||
82 | static int is_writable(struct pnfs_block_extent *be, sector_t isect) | ||
83 | { | ||
84 | return (be->be_state == PNFS_BLOCK_READWRITE_DATA || | ||
85 | be->be_state == PNFS_BLOCK_INVALID_DATA); | ||
86 | } | ||
87 | |||
88 | /* The data we are handed might be spread across several bios. We need | ||
89 | * to track when the last one is finished. | ||
90 | */ | ||
91 | struct parallel_io { | ||
92 | struct kref refcnt; | ||
93 | struct rpc_call_ops call_ops; | ||
94 | void (*pnfs_callback) (void *data); | ||
95 | void *data; | ||
96 | }; | ||
97 | |||
98 | static inline struct parallel_io *alloc_parallel(void *data) | ||
99 | { | ||
100 | struct parallel_io *rv; | ||
101 | |||
102 | rv = kmalloc(sizeof(*rv), GFP_NOFS); | ||
103 | if (rv) { | ||
104 | rv->data = data; | ||
105 | kref_init(&rv->refcnt); | ||
106 | } | ||
107 | return rv; | ||
108 | } | ||
109 | |||
110 | static inline void get_parallel(struct parallel_io *p) | ||
111 | { | ||
112 | kref_get(&p->refcnt); | ||
113 | } | ||
114 | |||
115 | static void destroy_parallel(struct kref *kref) | ||
116 | { | ||
117 | struct parallel_io *p = container_of(kref, struct parallel_io, refcnt); | ||
118 | |||
119 | dprintk("%s enter\n", __func__); | ||
120 | p->pnfs_callback(p->data); | ||
121 | kfree(p); | ||
122 | } | ||
123 | |||
124 | static inline void put_parallel(struct parallel_io *p) | ||
125 | { | ||
126 | kref_put(&p->refcnt, destroy_parallel); | ||
127 | } | ||
128 | |||
129 | static struct bio * | ||
130 | bl_submit_bio(int rw, struct bio *bio) | ||
131 | { | ||
132 | if (bio) { | ||
133 | get_parallel(bio->bi_private); | ||
134 | dprintk("%s submitting %s bio %u@%llu\n", __func__, | ||
135 | rw == READ ? "read" : "write", | ||
136 | bio->bi_size, (unsigned long long)bio->bi_sector); | ||
137 | submit_bio(rw, bio); | ||
138 | } | ||
139 | return NULL; | ||
140 | } | ||
141 | |||
142 | static struct bio *bl_alloc_init_bio(int npg, sector_t isect, | ||
143 | struct pnfs_block_extent *be, | ||
144 | void (*end_io)(struct bio *, int err), | ||
145 | struct parallel_io *par) | ||
146 | { | ||
147 | struct bio *bio; | ||
148 | |||
149 | bio = bio_alloc(GFP_NOIO, npg); | ||
150 | if (!bio) | ||
151 | return NULL; | ||
152 | |||
153 | bio->bi_sector = isect - be->be_f_offset + be->be_v_offset; | ||
154 | bio->bi_bdev = be->be_mdev; | ||
155 | bio->bi_end_io = end_io; | ||
156 | bio->bi_private = par; | ||
157 | return bio; | ||
158 | } | ||
159 | |||
160 | static struct bio *bl_add_page_to_bio(struct bio *bio, int npg, int rw, | ||
161 | sector_t isect, struct page *page, | ||
162 | struct pnfs_block_extent *be, | ||
163 | void (*end_io)(struct bio *, int err), | ||
164 | struct parallel_io *par) | ||
165 | { | ||
166 | retry: | ||
167 | if (!bio) { | ||
168 | bio = bl_alloc_init_bio(npg, isect, be, end_io, par); | ||
169 | if (!bio) | ||
170 | return ERR_PTR(-ENOMEM); | ||
171 | } | ||
172 | if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { | ||
173 | bio = bl_submit_bio(rw, bio); | ||
174 | goto retry; | ||
175 | } | ||
176 | return bio; | ||
177 | } | ||
178 | |||
179 | /* This is basically copied from mpage_end_io_read */ | ||
180 | static void bl_end_io_read(struct bio *bio, int err) | ||
181 | { | ||
182 | struct parallel_io *par = bio->bi_private; | ||
183 | const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); | ||
184 | struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; | ||
185 | struct nfs_read_data *rdata = (struct nfs_read_data *)par->data; | ||
186 | |||
187 | do { | ||
188 | struct page *page = bvec->bv_page; | ||
189 | |||
190 | if (--bvec >= bio->bi_io_vec) | ||
191 | prefetchw(&bvec->bv_page->flags); | ||
192 | if (uptodate) | ||
193 | SetPageUptodate(page); | ||
194 | } while (bvec >= bio->bi_io_vec); | ||
195 | if (!uptodate) { | ||
196 | if (!rdata->pnfs_error) | ||
197 | rdata->pnfs_error = -EIO; | ||
198 | pnfs_set_lo_fail(rdata->lseg); | ||
199 | } | ||
200 | bio_put(bio); | ||
201 | put_parallel(par); | ||
202 | } | ||
203 | |||
204 | static void bl_read_cleanup(struct work_struct *work) | ||
205 | { | ||
206 | struct rpc_task *task; | ||
207 | struct nfs_read_data *rdata; | ||
208 | dprintk("%s enter\n", __func__); | ||
209 | task = container_of(work, struct rpc_task, u.tk_work); | ||
210 | rdata = container_of(task, struct nfs_read_data, task); | ||
211 | pnfs_ld_read_done(rdata); | ||
212 | } | ||
213 | |||
214 | static void | ||
215 | bl_end_par_io_read(void *data) | ||
216 | { | ||
217 | struct nfs_read_data *rdata = data; | ||
218 | |||
219 | INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup); | ||
220 | schedule_work(&rdata->task.u.tk_work); | ||
221 | } | ||
222 | |||
223 | /* We don't want normal .rpc_call_done callback used, so we replace it | ||
224 | * with this stub. | ||
225 | */ | ||
226 | static void bl_rpc_do_nothing(struct rpc_task *task, void *calldata) | ||
227 | { | ||
228 | return; | ||
229 | } | ||
230 | |||
231 | static enum pnfs_try_status | ||
232 | bl_read_pagelist(struct nfs_read_data *rdata) | ||
233 | { | ||
234 | int i, hole; | ||
235 | struct bio *bio = NULL; | ||
236 | struct pnfs_block_extent *be = NULL, *cow_read = NULL; | ||
237 | sector_t isect, extent_length = 0; | ||
238 | struct parallel_io *par; | ||
239 | loff_t f_offset = rdata->args.offset; | ||
240 | size_t count = rdata->args.count; | ||
241 | struct page **pages = rdata->args.pages; | ||
242 | int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT; | ||
243 | |||
244 | dprintk("%s enter nr_pages %u offset %lld count %Zd\n", __func__, | ||
245 | rdata->npages, f_offset, count); | ||
246 | |||
247 | par = alloc_parallel(rdata); | ||
248 | if (!par) | ||
249 | goto use_mds; | ||
250 | par->call_ops = *rdata->mds_ops; | ||
251 | par->call_ops.rpc_call_done = bl_rpc_do_nothing; | ||
252 | par->pnfs_callback = bl_end_par_io_read; | ||
253 | /* At this point, we can no longer jump to use_mds */ | ||
254 | |||
255 | isect = (sector_t) (f_offset >> SECTOR_SHIFT); | ||
256 | /* Code assumes extents are page-aligned */ | ||
257 | for (i = pg_index; i < rdata->npages; i++) { | ||
258 | if (!extent_length) { | ||
259 | /* We've used up the previous extent */ | ||
260 | bl_put_extent(be); | ||
261 | bl_put_extent(cow_read); | ||
262 | bio = bl_submit_bio(READ, bio); | ||
263 | /* Get the next one */ | ||
264 | be = bl_find_get_extent(BLK_LSEG2EXT(rdata->lseg), | ||
265 | isect, &cow_read); | ||
266 | if (!be) { | ||
267 | rdata->pnfs_error = -EIO; | ||
268 | goto out; | ||
269 | } | ||
270 | extent_length = be->be_length - | ||
271 | (isect - be->be_f_offset); | ||
272 | if (cow_read) { | ||
273 | sector_t cow_length = cow_read->be_length - | ||
274 | (isect - cow_read->be_f_offset); | ||
275 | extent_length = min(extent_length, cow_length); | ||
276 | } | ||
277 | } | ||
278 | hole = is_hole(be, isect); | ||
279 | if (hole && !cow_read) { | ||
280 | bio = bl_submit_bio(READ, bio); | ||
281 | /* Fill hole w/ zeroes w/o accessing device */ | ||
282 | dprintk("%s Zeroing page for hole\n", __func__); | ||
283 | zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE); | ||
284 | print_page(pages[i]); | ||
285 | SetPageUptodate(pages[i]); | ||
286 | } else { | ||
287 | struct pnfs_block_extent *be_read; | ||
288 | |||
289 | be_read = (hole && cow_read) ? cow_read : be; | ||
290 | bio = bl_add_page_to_bio(bio, rdata->npages - i, READ, | ||
291 | isect, pages[i], be_read, | ||
292 | bl_end_io_read, par); | ||
293 | if (IS_ERR(bio)) { | ||
294 | rdata->pnfs_error = PTR_ERR(bio); | ||
295 | bio = NULL; | ||
296 | goto out; | ||
297 | } | ||
298 | } | ||
299 | isect += PAGE_CACHE_SECTORS; | ||
300 | extent_length -= PAGE_CACHE_SECTORS; | ||
301 | } | ||
302 | if ((isect << SECTOR_SHIFT) >= rdata->inode->i_size) { | ||
303 | rdata->res.eof = 1; | ||
304 | rdata->res.count = rdata->inode->i_size - f_offset; | ||
305 | } else { | ||
306 | rdata->res.count = (isect << SECTOR_SHIFT) - f_offset; | ||
307 | } | ||
308 | out: | ||
309 | bl_put_extent(be); | ||
310 | bl_put_extent(cow_read); | ||
311 | bl_submit_bio(READ, bio); | ||
312 | put_parallel(par); | ||
313 | return PNFS_ATTEMPTED; | ||
314 | |||
315 | use_mds: | ||
316 | dprintk("Giving up and using normal NFS\n"); | ||
317 | return PNFS_NOT_ATTEMPTED; | ||
318 | } | ||
319 | |||
320 | static void mark_extents_written(struct pnfs_block_layout *bl, | ||
321 | __u64 offset, __u32 count) | ||
322 | { | ||
323 | sector_t isect, end; | ||
324 | struct pnfs_block_extent *be; | ||
325 | |||
326 | dprintk("%s(%llu, %u)\n", __func__, offset, count); | ||
327 | if (count == 0) | ||
328 | return; | ||
329 | isect = (offset & (long)(PAGE_CACHE_MASK)) >> SECTOR_SHIFT; | ||
330 | end = (offset + count + PAGE_CACHE_SIZE - 1) & (long)(PAGE_CACHE_MASK); | ||
331 | end >>= SECTOR_SHIFT; | ||
332 | while (isect < end) { | ||
333 | sector_t len; | ||
334 | be = bl_find_get_extent(bl, isect, NULL); | ||
335 | BUG_ON(!be); /* FIXME */ | ||
336 | len = min(end, be->be_f_offset + be->be_length) - isect; | ||
337 | if (be->be_state == PNFS_BLOCK_INVALID_DATA) | ||
338 | bl_mark_for_commit(be, isect, len); /* What if fails? */ | ||
339 | isect += len; | ||
340 | bl_put_extent(be); | ||
341 | } | ||
342 | } | ||
343 | |||
344 | static void bl_end_io_write_zero(struct bio *bio, int err) | ||
345 | { | ||
346 | struct parallel_io *par = bio->bi_private; | ||
347 | const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); | ||
348 | struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; | ||
349 | struct nfs_write_data *wdata = (struct nfs_write_data *)par->data; | ||
350 | |||
351 | do { | ||
352 | struct page *page = bvec->bv_page; | ||
353 | |||
354 | if (--bvec >= bio->bi_io_vec) | ||
355 | prefetchw(&bvec->bv_page->flags); | ||
356 | /* This is the zeroing page we added */ | ||
357 | end_page_writeback(page); | ||
358 | page_cache_release(page); | ||
359 | } while (bvec >= bio->bi_io_vec); | ||
360 | if (!uptodate) { | ||
361 | if (!wdata->pnfs_error) | ||
362 | wdata->pnfs_error = -EIO; | ||
363 | pnfs_set_lo_fail(wdata->lseg); | ||
364 | } | ||
365 | bio_put(bio); | ||
366 | put_parallel(par); | ||
367 | } | ||
368 | |||
369 | /* This is basically copied from mpage_end_io_read */ | ||
370 | static void bl_end_io_write(struct bio *bio, int err) | ||
371 | { | ||
372 | struct parallel_io *par = bio->bi_private; | ||
373 | const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); | ||
374 | struct nfs_write_data *wdata = (struct nfs_write_data *)par->data; | ||
375 | |||
376 | if (!uptodate) { | ||
377 | if (!wdata->pnfs_error) | ||
378 | wdata->pnfs_error = -EIO; | ||
379 | pnfs_set_lo_fail(wdata->lseg); | ||
380 | } | ||
381 | bio_put(bio); | ||
382 | put_parallel(par); | ||
383 | } | ||
384 | |||
385 | /* Function scheduled for call during bl_end_par_io_write, | ||
386 | * it marks sectors as written and extends the commitlist. | ||
387 | */ | ||
388 | static void bl_write_cleanup(struct work_struct *work) | ||
389 | { | ||
390 | struct rpc_task *task; | ||
391 | struct nfs_write_data *wdata; | ||
392 | dprintk("%s enter\n", __func__); | ||
393 | task = container_of(work, struct rpc_task, u.tk_work); | ||
394 | wdata = container_of(task, struct nfs_write_data, task); | ||
395 | if (!wdata->pnfs_error) { | ||
396 | /* Marks for LAYOUTCOMMIT */ | ||
397 | mark_extents_written(BLK_LSEG2EXT(wdata->lseg), | ||
398 | wdata->args.offset, wdata->args.count); | ||
399 | } | ||
400 | pnfs_ld_write_done(wdata); | ||
401 | } | ||
402 | |||
403 | /* Called when last of bios associated with a bl_write_pagelist call finishes */ | ||
404 | static void bl_end_par_io_write(void *data) | ||
405 | { | ||
406 | struct nfs_write_data *wdata = data; | ||
407 | |||
408 | wdata->task.tk_status = 0; | ||
409 | wdata->verf.committed = NFS_FILE_SYNC; | ||
410 | INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup); | ||
411 | schedule_work(&wdata->task.u.tk_work); | ||
412 | } | ||
413 | |||
414 | /* FIXME STUB - mark intersection of layout and page as bad, so is not | ||
415 | * used again. | ||
416 | */ | ||
417 | static void mark_bad_read(void) | ||
418 | { | ||
419 | return; | ||
420 | } | ||
421 | |||
422 | /* | ||
423 | * map_block: map a requested I/0 block (isect) into an offset in the LVM | ||
424 | * block_device | ||
425 | */ | ||
426 | static void | ||
427 | map_block(struct buffer_head *bh, sector_t isect, struct pnfs_block_extent *be) | ||
428 | { | ||
429 | dprintk("%s enter be=%p\n", __func__, be); | ||
430 | |||
431 | set_buffer_mapped(bh); | ||
432 | bh->b_bdev = be->be_mdev; | ||
433 | bh->b_blocknr = (isect - be->be_f_offset + be->be_v_offset) >> | ||
434 | (be->be_mdev->bd_inode->i_blkbits - SECTOR_SHIFT); | ||
435 | |||
436 | dprintk("%s isect %llu, bh->b_blocknr %ld, using bsize %Zd\n", | ||
437 | __func__, (unsigned long long)isect, (long)bh->b_blocknr, | ||
438 | bh->b_size); | ||
439 | return; | ||
440 | } | ||
441 | |||
442 | /* Given an unmapped page, zero it or read in page for COW, page is locked | ||
443 | * by caller. | ||
444 | */ | ||
445 | static int | ||
446 | init_page_for_write(struct page *page, struct pnfs_block_extent *cow_read) | ||
447 | { | ||
448 | struct buffer_head *bh = NULL; | ||
449 | int ret = 0; | ||
450 | sector_t isect; | ||
451 | |||
452 | dprintk("%s enter, %p\n", __func__, page); | ||
453 | BUG_ON(PageUptodate(page)); | ||
454 | if (!cow_read) { | ||
455 | zero_user_segment(page, 0, PAGE_SIZE); | ||
456 | SetPageUptodate(page); | ||
457 | goto cleanup; | ||
458 | } | ||
459 | |||
460 | bh = alloc_page_buffers(page, PAGE_CACHE_SIZE, 0); | ||
461 | if (!bh) { | ||
462 | ret = -ENOMEM; | ||
463 | goto cleanup; | ||
464 | } | ||
465 | |||
466 | isect = (sector_t) page->index << PAGE_CACHE_SECTOR_SHIFT; | ||
467 | map_block(bh, isect, cow_read); | ||
468 | if (!bh_uptodate_or_lock(bh)) | ||
469 | ret = bh_submit_read(bh); | ||
470 | if (ret) | ||
471 | goto cleanup; | ||
472 | SetPageUptodate(page); | ||
473 | |||
474 | cleanup: | ||
475 | bl_put_extent(cow_read); | ||
476 | if (bh) | ||
477 | free_buffer_head(bh); | ||
478 | if (ret) { | ||
479 | /* Need to mark layout with bad read...should now | ||
480 | * just use nfs4 for reads and writes. | ||
481 | */ | ||
482 | mark_bad_read(); | ||
483 | } | ||
484 | return ret; | ||
485 | } | ||
486 | |||
487 | static enum pnfs_try_status | ||
488 | bl_write_pagelist(struct nfs_write_data *wdata, int sync) | ||
489 | { | ||
490 | int i, ret, npg_zero, pg_index, last = 0; | ||
491 | struct bio *bio = NULL; | ||
492 | struct pnfs_block_extent *be = NULL, *cow_read = NULL; | ||
493 | sector_t isect, last_isect = 0, extent_length = 0; | ||
494 | struct parallel_io *par; | ||
495 | loff_t offset = wdata->args.offset; | ||
496 | size_t count = wdata->args.count; | ||
497 | struct page **pages = wdata->args.pages; | ||
498 | struct page *page; | ||
499 | pgoff_t index; | ||
500 | u64 temp; | ||
501 | int npg_per_block = | ||
502 | NFS_SERVER(wdata->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT; | ||
503 | |||
504 | dprintk("%s enter, %Zu@%lld\n", __func__, count, offset); | ||
505 | /* At this point, wdata->pages is a (sequential) list of nfs_pages. | ||
506 | * We want to write each, and if there is an error set pnfs_error | ||
507 | * to have it redone using nfs. | ||
508 | */ | ||
509 | par = alloc_parallel(wdata); | ||
510 | if (!par) | ||
511 | return PNFS_NOT_ATTEMPTED; | ||
512 | par->call_ops = *wdata->mds_ops; | ||
513 | par->call_ops.rpc_call_done = bl_rpc_do_nothing; | ||
514 | par->pnfs_callback = bl_end_par_io_write; | ||
515 | /* At this point, have to be more careful with error handling */ | ||
516 | |||
517 | isect = (sector_t) ((offset & (long)PAGE_CACHE_MASK) >> SECTOR_SHIFT); | ||
518 | be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), isect, &cow_read); | ||
519 | if (!be || !is_writable(be, isect)) { | ||
520 | dprintk("%s no matching extents!\n", __func__); | ||
521 | wdata->pnfs_error = -EINVAL; | ||
522 | goto out; | ||
523 | } | ||
524 | |||
525 | /* First page inside INVALID extent */ | ||
526 | if (be->be_state == PNFS_BLOCK_INVALID_DATA) { | ||
527 | temp = offset >> PAGE_CACHE_SHIFT; | ||
528 | npg_zero = do_div(temp, npg_per_block); | ||
529 | isect = (sector_t) (((offset - npg_zero * PAGE_CACHE_SIZE) & | ||
530 | (long)PAGE_CACHE_MASK) >> SECTOR_SHIFT); | ||
531 | extent_length = be->be_length - (isect - be->be_f_offset); | ||
532 | |||
533 | fill_invalid_ext: | ||
534 | dprintk("%s need to zero %d pages\n", __func__, npg_zero); | ||
535 | for (;npg_zero > 0; npg_zero--) { | ||
536 | if (bl_is_sector_init(be->be_inval, isect)) { | ||
537 | dprintk("isect %llu already init\n", | ||
538 | (unsigned long long)isect); | ||
539 | goto next_page; | ||
540 | } | ||
541 | /* page ref released in bl_end_io_write_zero */ | ||
542 | index = isect >> PAGE_CACHE_SECTOR_SHIFT; | ||
543 | dprintk("%s zero %dth page: index %lu isect %llu\n", | ||
544 | __func__, npg_zero, index, | ||
545 | (unsigned long long)isect); | ||
546 | page = | ||
547 | find_or_create_page(wdata->inode->i_mapping, index, | ||
548 | GFP_NOFS); | ||
549 | if (!page) { | ||
550 | dprintk("%s oom\n", __func__); | ||
551 | wdata->pnfs_error = -ENOMEM; | ||
552 | goto out; | ||
553 | } | ||
554 | |||
555 | /* PageDirty: Other will write this out | ||
556 | * PageWriteback: Other is writing this out | ||
557 | * PageUptodate: It was read before | ||
558 | * sector_initialized: already written out | ||
559 | */ | ||
560 | if (PageDirty(page) || PageWriteback(page)) { | ||
561 | print_page(page); | ||
562 | unlock_page(page); | ||
563 | page_cache_release(page); | ||
564 | goto next_page; | ||
565 | } | ||
566 | if (!PageUptodate(page)) { | ||
567 | /* New page, readin or zero it */ | ||
568 | init_page_for_write(page, cow_read); | ||
569 | } | ||
570 | set_page_writeback(page); | ||
571 | unlock_page(page); | ||
572 | |||
573 | ret = bl_mark_sectors_init(be->be_inval, isect, | ||
574 | PAGE_CACHE_SECTORS, | ||
575 | NULL); | ||
576 | if (unlikely(ret)) { | ||
577 | dprintk("%s bl_mark_sectors_init fail %d\n", | ||
578 | __func__, ret); | ||
579 | end_page_writeback(page); | ||
580 | page_cache_release(page); | ||
581 | wdata->pnfs_error = ret; | ||
582 | goto out; | ||
583 | } | ||
584 | bio = bl_add_page_to_bio(bio, npg_zero, WRITE, | ||
585 | isect, page, be, | ||
586 | bl_end_io_write_zero, par); | ||
587 | if (IS_ERR(bio)) { | ||
588 | wdata->pnfs_error = PTR_ERR(bio); | ||
589 | bio = NULL; | ||
590 | goto out; | ||
591 | } | ||
592 | /* FIXME: This should be done in bi_end_io */ | ||
593 | mark_extents_written(BLK_LSEG2EXT(wdata->lseg), | ||
594 | page->index << PAGE_CACHE_SHIFT, | ||
595 | PAGE_CACHE_SIZE); | ||
596 | next_page: | ||
597 | isect += PAGE_CACHE_SECTORS; | ||
598 | extent_length -= PAGE_CACHE_SECTORS; | ||
599 | } | ||
600 | if (last) | ||
601 | goto write_done; | ||
602 | } | ||
603 | bio = bl_submit_bio(WRITE, bio); | ||
604 | |||
605 | /* Middle pages */ | ||
606 | pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT; | ||
607 | for (i = pg_index; i < wdata->npages; i++) { | ||
608 | if (!extent_length) { | ||
609 | /* We've used up the previous extent */ | ||
610 | bl_put_extent(be); | ||
611 | bio = bl_submit_bio(WRITE, bio); | ||
612 | /* Get the next one */ | ||
613 | be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), | ||
614 | isect, NULL); | ||
615 | if (!be || !is_writable(be, isect)) { | ||
616 | wdata->pnfs_error = -EINVAL; | ||
617 | goto out; | ||
618 | } | ||
619 | extent_length = be->be_length - | ||
620 | (isect - be->be_f_offset); | ||
621 | } | ||
622 | if (be->be_state == PNFS_BLOCK_INVALID_DATA) { | ||
623 | ret = bl_mark_sectors_init(be->be_inval, isect, | ||
624 | PAGE_CACHE_SECTORS, | ||
625 | NULL); | ||
626 | if (unlikely(ret)) { | ||
627 | dprintk("%s bl_mark_sectors_init fail %d\n", | ||
628 | __func__, ret); | ||
629 | wdata->pnfs_error = ret; | ||
630 | goto out; | ||
631 | } | ||
632 | } | ||
633 | bio = bl_add_page_to_bio(bio, wdata->npages - i, WRITE, | ||
634 | isect, pages[i], be, | ||
635 | bl_end_io_write, par); | ||
636 | if (IS_ERR(bio)) { | ||
637 | wdata->pnfs_error = PTR_ERR(bio); | ||
638 | bio = NULL; | ||
639 | goto out; | ||
640 | } | ||
641 | isect += PAGE_CACHE_SECTORS; | ||
642 | last_isect = isect; | ||
643 | extent_length -= PAGE_CACHE_SECTORS; | ||
644 | } | ||
645 | |||
646 | /* Last page inside INVALID extent */ | ||
647 | if (be->be_state == PNFS_BLOCK_INVALID_DATA) { | ||
648 | bio = bl_submit_bio(WRITE, bio); | ||
649 | temp = last_isect >> PAGE_CACHE_SECTOR_SHIFT; | ||
650 | npg_zero = npg_per_block - do_div(temp, npg_per_block); | ||
651 | if (npg_zero < npg_per_block) { | ||
652 | last = 1; | ||
653 | goto fill_invalid_ext; | ||
654 | } | ||
655 | } | ||
656 | |||
657 | write_done: | ||
658 | wdata->res.count = (last_isect << SECTOR_SHIFT) - (offset); | ||
659 | if (count < wdata->res.count) { | ||
660 | wdata->res.count = count; | ||
661 | } | ||
662 | out: | ||
663 | bl_put_extent(be); | ||
664 | bl_submit_bio(WRITE, bio); | ||
665 | put_parallel(par); | ||
666 | return PNFS_ATTEMPTED; | ||
667 | } | ||
668 | |||
669 | /* FIXME - range ignored */ | ||
670 | static void | ||
671 | release_extents(struct pnfs_block_layout *bl, struct pnfs_layout_range *range) | ||
672 | { | ||
673 | int i; | ||
674 | struct pnfs_block_extent *be; | ||
675 | |||
676 | spin_lock(&bl->bl_ext_lock); | ||
677 | for (i = 0; i < EXTENT_LISTS; i++) { | ||
678 | while (!list_empty(&bl->bl_extents[i])) { | ||
679 | be = list_first_entry(&bl->bl_extents[i], | ||
680 | struct pnfs_block_extent, | ||
681 | be_node); | ||
682 | list_del(&be->be_node); | ||
683 | bl_put_extent(be); | ||
684 | } | ||
685 | } | ||
686 | spin_unlock(&bl->bl_ext_lock); | ||
687 | } | ||
688 | |||
689 | static void | ||
690 | release_inval_marks(struct pnfs_inval_markings *marks) | ||
691 | { | ||
692 | struct pnfs_inval_tracking *pos, *temp; | ||
693 | |||
694 | list_for_each_entry_safe(pos, temp, &marks->im_tree.mtt_stub, it_link) { | ||
695 | list_del(&pos->it_link); | ||
696 | kfree(pos); | ||
697 | } | ||
698 | return; | ||
699 | } | ||
700 | |||
701 | static void bl_free_layout_hdr(struct pnfs_layout_hdr *lo) | ||
702 | { | ||
703 | struct pnfs_block_layout *bl = BLK_LO2EXT(lo); | ||
704 | |||
705 | dprintk("%s enter\n", __func__); | ||
706 | release_extents(bl, NULL); | ||
707 | release_inval_marks(&bl->bl_inval); | ||
708 | kfree(bl); | ||
709 | } | ||
710 | |||
711 | static struct pnfs_layout_hdr *bl_alloc_layout_hdr(struct inode *inode, | ||
712 | gfp_t gfp_flags) | ||
713 | { | ||
714 | struct pnfs_block_layout *bl; | ||
715 | |||
716 | dprintk("%s enter\n", __func__); | ||
717 | bl = kzalloc(sizeof(*bl), gfp_flags); | ||
718 | if (!bl) | ||
719 | return NULL; | ||
720 | spin_lock_init(&bl->bl_ext_lock); | ||
721 | INIT_LIST_HEAD(&bl->bl_extents[0]); | ||
722 | INIT_LIST_HEAD(&bl->bl_extents[1]); | ||
723 | INIT_LIST_HEAD(&bl->bl_commit); | ||
724 | INIT_LIST_HEAD(&bl->bl_committing); | ||
725 | bl->bl_count = 0; | ||
726 | bl->bl_blocksize = NFS_SERVER(inode)->pnfs_blksize >> SECTOR_SHIFT; | ||
727 | BL_INIT_INVAL_MARKS(&bl->bl_inval, bl->bl_blocksize); | ||
728 | return &bl->bl_layout; | ||
729 | } | ||
730 | |||
731 | static void bl_free_lseg(struct pnfs_layout_segment *lseg) | ||
732 | { | ||
733 | dprintk("%s enter\n", __func__); | ||
734 | kfree(lseg); | ||
735 | } | ||
736 | |||
737 | /* We pretty much ignore lseg, and store all data layout wide, so we | ||
738 | * can correctly merge. | ||
739 | */ | ||
740 | static struct pnfs_layout_segment *bl_alloc_lseg(struct pnfs_layout_hdr *lo, | ||
741 | struct nfs4_layoutget_res *lgr, | ||
742 | gfp_t gfp_flags) | ||
743 | { | ||
744 | struct pnfs_layout_segment *lseg; | ||
745 | int status; | ||
746 | |||
747 | dprintk("%s enter\n", __func__); | ||
748 | lseg = kzalloc(sizeof(*lseg), gfp_flags); | ||
749 | if (!lseg) | ||
750 | return ERR_PTR(-ENOMEM); | ||
751 | status = nfs4_blk_process_layoutget(lo, lgr, gfp_flags); | ||
752 | if (status) { | ||
753 | /* We don't want to call the full-blown bl_free_lseg, | ||
754 | * since on error extents were not touched. | ||
755 | */ | ||
756 | kfree(lseg); | ||
757 | return ERR_PTR(status); | ||
758 | } | ||
759 | return lseg; | ||
760 | } | ||
761 | |||
762 | static void | ||
763 | bl_encode_layoutcommit(struct pnfs_layout_hdr *lo, struct xdr_stream *xdr, | ||
764 | const struct nfs4_layoutcommit_args *arg) | ||
765 | { | ||
766 | dprintk("%s enter\n", __func__); | ||
767 | encode_pnfs_block_layoutupdate(BLK_LO2EXT(lo), xdr, arg); | ||
768 | } | ||
769 | |||
770 | static void | ||
771 | bl_cleanup_layoutcommit(struct nfs4_layoutcommit_data *lcdata) | ||
772 | { | ||
773 | struct pnfs_layout_hdr *lo = NFS_I(lcdata->args.inode)->layout; | ||
774 | |||
775 | dprintk("%s enter\n", __func__); | ||
776 | clean_pnfs_block_layoutupdate(BLK_LO2EXT(lo), &lcdata->args, lcdata->res.status); | ||
777 | } | ||
778 | |||
779 | static void free_blk_mountid(struct block_mount_id *mid) | ||
780 | { | ||
781 | if (mid) { | ||
782 | struct pnfs_block_dev *dev; | ||
783 | spin_lock(&mid->bm_lock); | ||
784 | while (!list_empty(&mid->bm_devlist)) { | ||
785 | dev = list_first_entry(&mid->bm_devlist, | ||
786 | struct pnfs_block_dev, | ||
787 | bm_node); | ||
788 | list_del(&dev->bm_node); | ||
789 | bl_free_block_dev(dev); | ||
790 | } | ||
791 | spin_unlock(&mid->bm_lock); | ||
792 | kfree(mid); | ||
793 | } | ||
794 | } | ||
795 | |||
796 | /* This is mostly copied from the filelayout's get_device_info function. | ||
797 | * It seems much of this should be at the generic pnfs level. | ||
798 | */ | ||
799 | static struct pnfs_block_dev * | ||
800 | nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh, | ||
801 | struct nfs4_deviceid *d_id) | ||
802 | { | ||
803 | struct pnfs_device *dev; | ||
804 | struct pnfs_block_dev *rv; | ||
805 | u32 max_resp_sz; | ||
806 | int max_pages; | ||
807 | struct page **pages = NULL; | ||
808 | int i, rc; | ||
809 | |||
810 | /* | ||
811 | * Use the session max response size as the basis for setting | ||
812 | * GETDEVICEINFO's maxcount | ||
813 | */ | ||
814 | max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; | ||
815 | max_pages = max_resp_sz >> PAGE_SHIFT; | ||
816 | dprintk("%s max_resp_sz %u max_pages %d\n", | ||
817 | __func__, max_resp_sz, max_pages); | ||
818 | |||
819 | dev = kmalloc(sizeof(*dev), GFP_NOFS); | ||
820 | if (!dev) { | ||
821 | dprintk("%s kmalloc failed\n", __func__); | ||
822 | return ERR_PTR(-ENOMEM); | ||
823 | } | ||
824 | |||
825 | pages = kzalloc(max_pages * sizeof(struct page *), GFP_NOFS); | ||
826 | if (pages == NULL) { | ||
827 | kfree(dev); | ||
828 | return ERR_PTR(-ENOMEM); | ||
829 | } | ||
830 | for (i = 0; i < max_pages; i++) { | ||
831 | pages[i] = alloc_page(GFP_NOFS); | ||
832 | if (!pages[i]) { | ||
833 | rv = ERR_PTR(-ENOMEM); | ||
834 | goto out_free; | ||
835 | } | ||
836 | } | ||
837 | |||
838 | memcpy(&dev->dev_id, d_id, sizeof(*d_id)); | ||
839 | dev->layout_type = LAYOUT_BLOCK_VOLUME; | ||
840 | dev->pages = pages; | ||
841 | dev->pgbase = 0; | ||
842 | dev->pglen = PAGE_SIZE * max_pages; | ||
843 | dev->mincount = 0; | ||
844 | |||
845 | dprintk("%s: dev_id: %s\n", __func__, dev->dev_id.data); | ||
846 | rc = nfs4_proc_getdeviceinfo(server, dev); | ||
847 | dprintk("%s getdevice info returns %d\n", __func__, rc); | ||
848 | if (rc) { | ||
849 | rv = ERR_PTR(rc); | ||
850 | goto out_free; | ||
851 | } | ||
852 | |||
853 | rv = nfs4_blk_decode_device(server, dev); | ||
854 | out_free: | ||
855 | for (i = 0; i < max_pages; i++) | ||
856 | __free_page(pages[i]); | ||
857 | kfree(pages); | ||
858 | kfree(dev); | ||
859 | return rv; | ||
860 | } | ||
861 | |||
862 | static int | ||
863 | bl_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *fh) | ||
864 | { | ||
865 | struct block_mount_id *b_mt_id = NULL; | ||
866 | struct pnfs_devicelist *dlist = NULL; | ||
867 | struct pnfs_block_dev *bdev; | ||
868 | LIST_HEAD(block_disklist); | ||
869 | int status, i; | ||
870 | |||
871 | dprintk("%s enter\n", __func__); | ||
872 | |||
873 | if (server->pnfs_blksize == 0) { | ||
874 | dprintk("%s Server did not return blksize\n", __func__); | ||
875 | return -EINVAL; | ||
876 | } | ||
877 | b_mt_id = kzalloc(sizeof(struct block_mount_id), GFP_NOFS); | ||
878 | if (!b_mt_id) { | ||
879 | status = -ENOMEM; | ||
880 | goto out_error; | ||
881 | } | ||
882 | /* Initialize nfs4 block layout mount id */ | ||
883 | spin_lock_init(&b_mt_id->bm_lock); | ||
884 | INIT_LIST_HEAD(&b_mt_id->bm_devlist); | ||
885 | |||
886 | dlist = kmalloc(sizeof(struct pnfs_devicelist), GFP_NOFS); | ||
887 | if (!dlist) { | ||
888 | status = -ENOMEM; | ||
889 | goto out_error; | ||
890 | } | ||
891 | dlist->eof = 0; | ||
892 | while (!dlist->eof) { | ||
893 | status = nfs4_proc_getdevicelist(server, fh, dlist); | ||
894 | if (status) | ||
895 | goto out_error; | ||
896 | dprintk("%s GETDEVICELIST numdevs=%i, eof=%i\n", | ||
897 | __func__, dlist->num_devs, dlist->eof); | ||
898 | for (i = 0; i < dlist->num_devs; i++) { | ||
899 | bdev = nfs4_blk_get_deviceinfo(server, fh, | ||
900 | &dlist->dev_id[i]); | ||
901 | if (IS_ERR(bdev)) { | ||
902 | status = PTR_ERR(bdev); | ||
903 | goto out_error; | ||
904 | } | ||
905 | spin_lock(&b_mt_id->bm_lock); | ||
906 | list_add(&bdev->bm_node, &b_mt_id->bm_devlist); | ||
907 | spin_unlock(&b_mt_id->bm_lock); | ||
908 | } | ||
909 | } | ||
910 | dprintk("%s SUCCESS\n", __func__); | ||
911 | server->pnfs_ld_data = b_mt_id; | ||
912 | |||
913 | out_return: | ||
914 | kfree(dlist); | ||
915 | return status; | ||
916 | |||
917 | out_error: | ||
918 | free_blk_mountid(b_mt_id); | ||
919 | goto out_return; | ||
920 | } | ||
921 | |||
922 | static int | ||
923 | bl_clear_layoutdriver(struct nfs_server *server) | ||
924 | { | ||
925 | struct block_mount_id *b_mt_id = server->pnfs_ld_data; | ||
926 | |||
927 | dprintk("%s enter\n", __func__); | ||
928 | free_blk_mountid(b_mt_id); | ||
929 | dprintk("%s RETURNS\n", __func__); | ||
930 | return 0; | ||
931 | } | ||
932 | |||
933 | static const struct nfs_pageio_ops bl_pg_read_ops = { | ||
934 | .pg_init = pnfs_generic_pg_init_read, | ||
935 | .pg_test = pnfs_generic_pg_test, | ||
936 | .pg_doio = pnfs_generic_pg_readpages, | ||
937 | }; | ||
938 | |||
939 | static const struct nfs_pageio_ops bl_pg_write_ops = { | ||
940 | .pg_init = pnfs_generic_pg_init_write, | ||
941 | .pg_test = pnfs_generic_pg_test, | ||
942 | .pg_doio = pnfs_generic_pg_writepages, | ||
943 | }; | ||
944 | |||
945 | static struct pnfs_layoutdriver_type blocklayout_type = { | ||
946 | .id = LAYOUT_BLOCK_VOLUME, | ||
947 | .name = "LAYOUT_BLOCK_VOLUME", | ||
948 | .read_pagelist = bl_read_pagelist, | ||
949 | .write_pagelist = bl_write_pagelist, | ||
950 | .alloc_layout_hdr = bl_alloc_layout_hdr, | ||
951 | .free_layout_hdr = bl_free_layout_hdr, | ||
952 | .alloc_lseg = bl_alloc_lseg, | ||
953 | .free_lseg = bl_free_lseg, | ||
954 | .encode_layoutcommit = bl_encode_layoutcommit, | ||
955 | .cleanup_layoutcommit = bl_cleanup_layoutcommit, | ||
956 | .set_layoutdriver = bl_set_layoutdriver, | ||
957 | .clear_layoutdriver = bl_clear_layoutdriver, | ||
958 | .pg_read_ops = &bl_pg_read_ops, | ||
959 | .pg_write_ops = &bl_pg_write_ops, | ||
960 | }; | ||
961 | |||
962 | static const struct rpc_pipe_ops bl_upcall_ops = { | ||
963 | .upcall = rpc_pipe_generic_upcall, | ||
964 | .downcall = bl_pipe_downcall, | ||
965 | .destroy_msg = bl_pipe_destroy_msg, | ||
966 | }; | ||
967 | |||
968 | static int __init nfs4blocklayout_init(void) | ||
969 | { | ||
970 | struct vfsmount *mnt; | ||
971 | struct path path; | ||
972 | int ret; | ||
973 | |||
974 | dprintk("%s: NFSv4 Block Layout Driver Registering...\n", __func__); | ||
975 | |||
976 | ret = pnfs_register_layoutdriver(&blocklayout_type); | ||
977 | if (ret) | ||
978 | goto out; | ||
979 | |||
980 | init_waitqueue_head(&bl_wq); | ||
981 | |||
982 | mnt = rpc_get_mount(); | ||
983 | if (IS_ERR(mnt)) { | ||
984 | ret = PTR_ERR(mnt); | ||
985 | goto out_remove; | ||
986 | } | ||
987 | |||
988 | ret = vfs_path_lookup(mnt->mnt_root, | ||
989 | mnt, | ||
990 | NFS_PIPE_DIRNAME, 0, &path); | ||
991 | if (ret) | ||
992 | goto out_putrpc; | ||
993 | |||
994 | bl_device_pipe = rpc_mkpipe(path.dentry, "blocklayout", NULL, | ||
995 | &bl_upcall_ops, 0); | ||
996 | path_put(&path); | ||
997 | if (IS_ERR(bl_device_pipe)) { | ||
998 | ret = PTR_ERR(bl_device_pipe); | ||
999 | goto out_putrpc; | ||
1000 | } | ||
1001 | out: | ||
1002 | return ret; | ||
1003 | |||
1004 | out_putrpc: | ||
1005 | rpc_put_mount(); | ||
1006 | out_remove: | ||
1007 | pnfs_unregister_layoutdriver(&blocklayout_type); | ||
1008 | return ret; | ||
1009 | } | ||
1010 | |||
1011 | static void __exit nfs4blocklayout_exit(void) | ||
1012 | { | ||
1013 | dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n", | ||
1014 | __func__); | ||
1015 | |||
1016 | pnfs_unregister_layoutdriver(&blocklayout_type); | ||
1017 | rpc_unlink(bl_device_pipe); | ||
1018 | rpc_put_mount(); | ||
1019 | } | ||
1020 | |||
1021 | MODULE_ALIAS("nfs-layouttype4-3"); | ||
1022 | |||
1023 | module_init(nfs4blocklayout_init); | ||
1024 | module_exit(nfs4blocklayout_exit); | ||
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h new file mode 100644 index 00000000000..42acf7ef599 --- /dev/null +++ b/fs/nfs/blocklayout/blocklayout.h | |||
@@ -0,0 +1,205 @@ | |||
1 | /* | ||
2 | * linux/fs/nfs/blocklayout/blocklayout.h | ||
3 | * | ||
4 | * Module for the NFSv4.1 pNFS block layout driver. | ||
5 | * | ||
6 | * Copyright (c) 2006 The Regents of the University of Michigan. | ||
7 | * All rights reserved. | ||
8 | * | ||
9 | * Andy Adamson <andros@citi.umich.edu> | ||
10 | * Fred Isaman <iisaman@umich.edu> | ||
11 | * | ||
12 | * permission is granted to use, copy, create derivative works and | ||
13 | * redistribute this software and such derivative works for any purpose, | ||
14 | * so long as the name of the university of michigan is not used in | ||
15 | * any advertising or publicity pertaining to the use or distribution | ||
16 | * of this software without specific, written prior authorization. if | ||
17 | * the above copyright notice or any other identification of the | ||
18 | * university of michigan is included in any copy of any portion of | ||
19 | * this software, then the disclaimer below must also be included. | ||
20 | * | ||
21 | * this software is provided as is, without representation from the | ||
22 | * university of michigan as to its fitness for any purpose, and without | ||
23 | * warranty by the university of michigan of any kind, either express | ||
24 | * or implied, including without limitation the implied warranties of | ||
25 | * merchantability and fitness for a particular purpose. the regents | ||
26 | * of the university of michigan shall not be liable for any damages, | ||
27 | * including special, indirect, incidental, or consequential damages, | ||
28 | * with respect to any claim arising out or in connection with the use | ||
29 | * of the software, even if it has been or is hereafter advised of the | ||
30 | * possibility of such damages. | ||
31 | */ | ||
32 | #ifndef FS_NFS_NFS4BLOCKLAYOUT_H | ||
33 | #define FS_NFS_NFS4BLOCKLAYOUT_H | ||
34 | |||
35 | #include <linux/device-mapper.h> | ||
36 | #include <linux/nfs_fs.h> | ||
37 | #include <linux/sunrpc/rpc_pipe_fs.h> | ||
38 | |||
39 | #include "../pnfs.h" | ||
40 | |||
41 | #define PAGE_CACHE_SECTORS (PAGE_CACHE_SIZE >> SECTOR_SHIFT) | ||
42 | #define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT) | ||
43 | |||
44 | struct block_mount_id { | ||
45 | spinlock_t bm_lock; /* protects list */ | ||
46 | struct list_head bm_devlist; /* holds pnfs_block_dev */ | ||
47 | }; | ||
48 | |||
49 | struct pnfs_block_dev { | ||
50 | struct list_head bm_node; | ||
51 | struct nfs4_deviceid bm_mdevid; /* associated devid */ | ||
52 | struct block_device *bm_mdev; /* meta device itself */ | ||
53 | }; | ||
54 | |||
55 | enum exstate4 { | ||
56 | PNFS_BLOCK_READWRITE_DATA = 0, | ||
57 | PNFS_BLOCK_READ_DATA = 1, | ||
58 | PNFS_BLOCK_INVALID_DATA = 2, /* mapped, but data is invalid */ | ||
59 | PNFS_BLOCK_NONE_DATA = 3 /* unmapped, it's a hole */ | ||
60 | }; | ||
61 | |||
62 | #define MY_MAX_TAGS (15) /* tag bitnums used must be less than this */ | ||
63 | |||
64 | struct my_tree { | ||
65 | sector_t mtt_step_size; /* Internal sector alignment */ | ||
66 | struct list_head mtt_stub; /* Should be a radix tree */ | ||
67 | }; | ||
68 | |||
69 | struct pnfs_inval_markings { | ||
70 | spinlock_t im_lock; | ||
71 | struct my_tree im_tree; /* Sectors that need LAYOUTCOMMIT */ | ||
72 | sector_t im_block_size; /* Server blocksize in sectors */ | ||
73 | }; | ||
74 | |||
75 | struct pnfs_inval_tracking { | ||
76 | struct list_head it_link; | ||
77 | int it_sector; | ||
78 | int it_tags; | ||
79 | }; | ||
80 | |||
81 | /* sector_t fields are all in 512-byte sectors */ | ||
82 | struct pnfs_block_extent { | ||
83 | struct kref be_refcnt; | ||
84 | struct list_head be_node; /* link into lseg list */ | ||
85 | struct nfs4_deviceid be_devid; /* FIXME: could use device cache instead */ | ||
86 | struct block_device *be_mdev; | ||
87 | sector_t be_f_offset; /* the starting offset in the file */ | ||
88 | sector_t be_length; /* the size of the extent */ | ||
89 | sector_t be_v_offset; /* the starting offset in the volume */ | ||
90 | enum exstate4 be_state; /* the state of this extent */ | ||
91 | struct pnfs_inval_markings *be_inval; /* tracks INVAL->RW transition */ | ||
92 | }; | ||
93 | |||
94 | /* Shortened extent used by LAYOUTCOMMIT */ | ||
95 | struct pnfs_block_short_extent { | ||
96 | struct list_head bse_node; | ||
97 | struct nfs4_deviceid bse_devid; | ||
98 | struct block_device *bse_mdev; | ||
99 | sector_t bse_f_offset; /* the starting offset in the file */ | ||
100 | sector_t bse_length; /* the size of the extent */ | ||
101 | }; | ||
102 | |||
103 | static inline void | ||
104 | BL_INIT_INVAL_MARKS(struct pnfs_inval_markings *marks, sector_t blocksize) | ||
105 | { | ||
106 | spin_lock_init(&marks->im_lock); | ||
107 | INIT_LIST_HEAD(&marks->im_tree.mtt_stub); | ||
108 | marks->im_block_size = blocksize; | ||
109 | marks->im_tree.mtt_step_size = min((sector_t)PAGE_CACHE_SECTORS, | ||
110 | blocksize); | ||
111 | } | ||
112 | |||
113 | enum extentclass4 { | ||
114 | RW_EXTENT = 0, /* READWRTE and INVAL */ | ||
115 | RO_EXTENT = 1, /* READ and NONE */ | ||
116 | EXTENT_LISTS = 2, | ||
117 | }; | ||
118 | |||
119 | static inline int bl_choose_list(enum exstate4 state) | ||
120 | { | ||
121 | if (state == PNFS_BLOCK_READ_DATA || state == PNFS_BLOCK_NONE_DATA) | ||
122 | return RO_EXTENT; | ||
123 | else | ||
124 | return RW_EXTENT; | ||
125 | } | ||
126 | |||
127 | struct pnfs_block_layout { | ||
128 | struct pnfs_layout_hdr bl_layout; | ||
129 | struct pnfs_inval_markings bl_inval; /* tracks INVAL->RW transition */ | ||
130 | spinlock_t bl_ext_lock; /* Protects list manipulation */ | ||
131 | struct list_head bl_extents[EXTENT_LISTS]; /* R and RW extents */ | ||
132 | struct list_head bl_commit; /* Needs layout commit */ | ||
133 | struct list_head bl_committing; /* Layout committing */ | ||
134 | unsigned int bl_count; /* entries in bl_commit */ | ||
135 | sector_t bl_blocksize; /* Server blocksize in sectors */ | ||
136 | }; | ||
137 | |||
138 | #define BLK_ID(lo) ((struct block_mount_id *)(NFS_SERVER(lo->plh_inode)->pnfs_ld_data)) | ||
139 | |||
140 | static inline struct pnfs_block_layout * | ||
141 | BLK_LO2EXT(struct pnfs_layout_hdr *lo) | ||
142 | { | ||
143 | return container_of(lo, struct pnfs_block_layout, bl_layout); | ||
144 | } | ||
145 | |||
146 | static inline struct pnfs_block_layout * | ||
147 | BLK_LSEG2EXT(struct pnfs_layout_segment *lseg) | ||
148 | { | ||
149 | return BLK_LO2EXT(lseg->pls_layout); | ||
150 | } | ||
151 | |||
152 | struct bl_dev_msg { | ||
153 | int32_t status; | ||
154 | uint32_t major, minor; | ||
155 | }; | ||
156 | |||
157 | struct bl_msg_hdr { | ||
158 | u8 type; | ||
159 | u16 totallen; /* length of entire message, including hdr itself */ | ||
160 | }; | ||
161 | |||
162 | extern struct dentry *bl_device_pipe; | ||
163 | extern wait_queue_head_t bl_wq; | ||
164 | |||
165 | #define BL_DEVICE_UMOUNT 0x0 /* Umount--delete devices */ | ||
166 | #define BL_DEVICE_MOUNT 0x1 /* Mount--create devices*/ | ||
167 | #define BL_DEVICE_REQUEST_INIT 0x0 /* Start request */ | ||
168 | #define BL_DEVICE_REQUEST_PROC 0x1 /* User level process succeeds */ | ||
169 | #define BL_DEVICE_REQUEST_ERR 0x2 /* User level process fails */ | ||
170 | |||
171 | /* blocklayoutdev.c */ | ||
172 | ssize_t bl_pipe_downcall(struct file *, const char __user *, size_t); | ||
173 | void bl_pipe_destroy_msg(struct rpc_pipe_msg *); | ||
174 | struct block_device *nfs4_blkdev_get(dev_t dev); | ||
175 | int nfs4_blkdev_put(struct block_device *bdev); | ||
176 | struct pnfs_block_dev *nfs4_blk_decode_device(struct nfs_server *server, | ||
177 | struct pnfs_device *dev); | ||
178 | int nfs4_blk_process_layoutget(struct pnfs_layout_hdr *lo, | ||
179 | struct nfs4_layoutget_res *lgr, gfp_t gfp_flags); | ||
180 | |||
181 | /* blocklayoutdm.c */ | ||
182 | void bl_free_block_dev(struct pnfs_block_dev *bdev); | ||
183 | |||
184 | /* extents.c */ | ||
185 | struct pnfs_block_extent * | ||
186 | bl_find_get_extent(struct pnfs_block_layout *bl, sector_t isect, | ||
187 | struct pnfs_block_extent **cow_read); | ||
188 | int bl_mark_sectors_init(struct pnfs_inval_markings *marks, | ||
189 | sector_t offset, sector_t length, | ||
190 | sector_t **pages); | ||
191 | void bl_put_extent(struct pnfs_block_extent *be); | ||
192 | struct pnfs_block_extent *bl_alloc_extent(void); | ||
193 | int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect); | ||
194 | int encode_pnfs_block_layoutupdate(struct pnfs_block_layout *bl, | ||
195 | struct xdr_stream *xdr, | ||
196 | const struct nfs4_layoutcommit_args *arg); | ||
197 | void clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl, | ||
198 | const struct nfs4_layoutcommit_args *arg, | ||
199 | int status); | ||
200 | int bl_add_merge_extent(struct pnfs_block_layout *bl, | ||
201 | struct pnfs_block_extent *new); | ||
202 | int bl_mark_for_commit(struct pnfs_block_extent *be, | ||
203 | sector_t offset, sector_t length); | ||
204 | |||
205 | #endif /* FS_NFS_NFS4BLOCKLAYOUT_H */ | ||
diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c new file mode 100644 index 00000000000..d08ba9107fd --- /dev/null +++ b/fs/nfs/blocklayout/blocklayoutdev.c | |||
@@ -0,0 +1,391 @@ | |||
1 | /* | ||
2 | * linux/fs/nfs/blocklayout/blocklayoutdev.c | ||
3 | * | ||
4 | * Device operations for the pnfs nfs4 file layout driver. | ||
5 | * | ||
6 | * Copyright (c) 2006 The Regents of the University of Michigan. | ||
7 | * All rights reserved. | ||
8 | * | ||
9 | * Andy Adamson <andros@citi.umich.edu> | ||
10 | * Fred Isaman <iisaman@umich.edu> | ||
11 | * | ||
12 | * permission is granted to use, copy, create derivative works and | ||
13 | * redistribute this software and such derivative works for any purpose, | ||
14 | * so long as the name of the university of michigan is not used in | ||
15 | * any advertising or publicity pertaining to the use or distribution | ||
16 | * of this software without specific, written prior authorization. if | ||
17 | * the above copyright notice or any other identification of the | ||
18 | * university of michigan is included in any copy of any portion of | ||
19 | * this software, then the disclaimer below must also be included. | ||
20 | * | ||
21 | * this software is provided as is, without representation from the | ||
22 | * university of michigan as to its fitness for any purpose, and without | ||
23 | * warranty by the university of michigan of any kind, either express | ||
24 | * or implied, including without limitation the implied warranties of | ||
25 | * merchantability and fitness for a particular purpose. the regents | ||
26 | * of the university of michigan shall not be liable for any damages, | ||
27 | * including special, indirect, incidental, or consequential damages, | ||
28 | * with respect to any claim arising out or in connection with the use | ||
29 | * of the software, even if it has been or is hereafter advised of the | ||
30 | * possibility of such damages. | ||
31 | */ | ||
32 | #include <linux/module.h> | ||
33 | #include <linux/buffer_head.h> /* __bread */ | ||
34 | |||
35 | #include <linux/genhd.h> | ||
36 | #include <linux/blkdev.h> | ||
37 | #include <linux/hash.h> | ||
38 | |||
39 | #include "blocklayout.h" | ||
40 | |||
41 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | ||
42 | |||
43 | static int decode_sector_number(__be32 **rp, sector_t *sp) | ||
44 | { | ||
45 | uint64_t s; | ||
46 | |||
47 | *rp = xdr_decode_hyper(*rp, &s); | ||
48 | if (s & 0x1ff) { | ||
49 | printk(KERN_WARNING "%s: sector not aligned\n", __func__); | ||
50 | return -1; | ||
51 | } | ||
52 | *sp = s >> SECTOR_SHIFT; | ||
53 | return 0; | ||
54 | } | ||
55 | |||
56 | /* Open a block_device by device number. */ | ||
57 | struct block_device *nfs4_blkdev_get(dev_t dev) | ||
58 | { | ||
59 | struct block_device *bd; | ||
60 | |||
61 | dprintk("%s enter\n", __func__); | ||
62 | bd = blkdev_get_by_dev(dev, FMODE_READ, NULL); | ||
63 | if (IS_ERR(bd)) | ||
64 | goto fail; | ||
65 | return bd; | ||
66 | fail: | ||
67 | dprintk("%s failed to open device : %ld\n", | ||
68 | __func__, PTR_ERR(bd)); | ||
69 | return NULL; | ||
70 | } | ||
71 | |||
72 | /* | ||
73 | * Release the block device | ||
74 | */ | ||
75 | int nfs4_blkdev_put(struct block_device *bdev) | ||
76 | { | ||
77 | dprintk("%s for device %d:%d\n", __func__, MAJOR(bdev->bd_dev), | ||
78 | MINOR(bdev->bd_dev)); | ||
79 | return blkdev_put(bdev, FMODE_READ); | ||
80 | } | ||
81 | |||
82 | static struct bl_dev_msg bl_mount_reply; | ||
83 | |||
84 | ssize_t bl_pipe_downcall(struct file *filp, const char __user *src, | ||
85 | size_t mlen) | ||
86 | { | ||
87 | if (mlen != sizeof (struct bl_dev_msg)) | ||
88 | return -EINVAL; | ||
89 | |||
90 | if (copy_from_user(&bl_mount_reply, src, mlen) != 0) | ||
91 | return -EFAULT; | ||
92 | |||
93 | wake_up(&bl_wq); | ||
94 | |||
95 | return mlen; | ||
96 | } | ||
97 | |||
98 | void bl_pipe_destroy_msg(struct rpc_pipe_msg *msg) | ||
99 | { | ||
100 | if (msg->errno >= 0) | ||
101 | return; | ||
102 | wake_up(&bl_wq); | ||
103 | } | ||
104 | |||
105 | /* | ||
106 | * Decodes pnfs_block_deviceaddr4 which is XDR encoded in dev->dev_addr_buf. | ||
107 | */ | ||
108 | struct pnfs_block_dev * | ||
109 | nfs4_blk_decode_device(struct nfs_server *server, | ||
110 | struct pnfs_device *dev) | ||
111 | { | ||
112 | struct pnfs_block_dev *rv; | ||
113 | struct block_device *bd = NULL; | ||
114 | struct rpc_pipe_msg msg; | ||
115 | struct bl_msg_hdr bl_msg = { | ||
116 | .type = BL_DEVICE_MOUNT, | ||
117 | .totallen = dev->mincount, | ||
118 | }; | ||
119 | uint8_t *dataptr; | ||
120 | DECLARE_WAITQUEUE(wq, current); | ||
121 | struct bl_dev_msg *reply = &bl_mount_reply; | ||
122 | int offset, len, i, rc; | ||
123 | |||
124 | dprintk("%s CREATING PIPEFS MESSAGE\n", __func__); | ||
125 | dprintk("%s: deviceid: %s, mincount: %d\n", __func__, dev->dev_id.data, | ||
126 | dev->mincount); | ||
127 | |||
128 | memset(&msg, 0, sizeof(msg)); | ||
129 | msg.data = kzalloc(sizeof(bl_msg) + dev->mincount, GFP_NOFS); | ||
130 | if (!msg.data) { | ||
131 | rv = ERR_PTR(-ENOMEM); | ||
132 | goto out; | ||
133 | } | ||
134 | |||
135 | memcpy(msg.data, &bl_msg, sizeof(bl_msg)); | ||
136 | dataptr = (uint8_t *) msg.data; | ||
137 | len = dev->mincount; | ||
138 | offset = sizeof(bl_msg); | ||
139 | for (i = 0; len > 0; i++) { | ||
140 | memcpy(&dataptr[offset], page_address(dev->pages[i]), | ||
141 | len < PAGE_CACHE_SIZE ? len : PAGE_CACHE_SIZE); | ||
142 | len -= PAGE_CACHE_SIZE; | ||
143 | offset += PAGE_CACHE_SIZE; | ||
144 | } | ||
145 | msg.len = sizeof(bl_msg) + dev->mincount; | ||
146 | |||
147 | dprintk("%s CALLING USERSPACE DAEMON\n", __func__); | ||
148 | add_wait_queue(&bl_wq, &wq); | ||
149 | rc = rpc_queue_upcall(bl_device_pipe->d_inode, &msg); | ||
150 | if (rc < 0) { | ||
151 | remove_wait_queue(&bl_wq, &wq); | ||
152 | rv = ERR_PTR(rc); | ||
153 | goto out; | ||
154 | } | ||
155 | |||
156 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
157 | schedule(); | ||
158 | __set_current_state(TASK_RUNNING); | ||
159 | remove_wait_queue(&bl_wq, &wq); | ||
160 | |||
161 | if (reply->status != BL_DEVICE_REQUEST_PROC) { | ||
162 | dprintk("%s failed to open device: %d\n", | ||
163 | __func__, reply->status); | ||
164 | rv = ERR_PTR(-EINVAL); | ||
165 | goto out; | ||
166 | } | ||
167 | |||
168 | bd = nfs4_blkdev_get(MKDEV(reply->major, reply->minor)); | ||
169 | if (IS_ERR(bd)) { | ||
170 | rc = PTR_ERR(bd); | ||
171 | dprintk("%s failed to open device : %d\n", __func__, rc); | ||
172 | rv = ERR_PTR(rc); | ||
173 | goto out; | ||
174 | } | ||
175 | |||
176 | rv = kzalloc(sizeof(*rv), GFP_NOFS); | ||
177 | if (!rv) { | ||
178 | rv = ERR_PTR(-ENOMEM); | ||
179 | goto out; | ||
180 | } | ||
181 | |||
182 | rv->bm_mdev = bd; | ||
183 | memcpy(&rv->bm_mdevid, &dev->dev_id, sizeof(struct nfs4_deviceid)); | ||
184 | dprintk("%s Created device %s with bd_block_size %u\n", | ||
185 | __func__, | ||
186 | bd->bd_disk->disk_name, | ||
187 | bd->bd_block_size); | ||
188 | |||
189 | out: | ||
190 | kfree(msg.data); | ||
191 | return rv; | ||
192 | } | ||
193 | |||
194 | /* Map deviceid returned by the server to constructed block_device */ | ||
195 | static struct block_device *translate_devid(struct pnfs_layout_hdr *lo, | ||
196 | struct nfs4_deviceid *id) | ||
197 | { | ||
198 | struct block_device *rv = NULL; | ||
199 | struct block_mount_id *mid; | ||
200 | struct pnfs_block_dev *dev; | ||
201 | |||
202 | dprintk("%s enter, lo=%p, id=%p\n", __func__, lo, id); | ||
203 | mid = BLK_ID(lo); | ||
204 | spin_lock(&mid->bm_lock); | ||
205 | list_for_each_entry(dev, &mid->bm_devlist, bm_node) { | ||
206 | if (memcmp(id->data, dev->bm_mdevid.data, | ||
207 | NFS4_DEVICEID4_SIZE) == 0) { | ||
208 | rv = dev->bm_mdev; | ||
209 | goto out; | ||
210 | } | ||
211 | } | ||
212 | out: | ||
213 | spin_unlock(&mid->bm_lock); | ||
214 | dprintk("%s returning %p\n", __func__, rv); | ||
215 | return rv; | ||
216 | } | ||
217 | |||
218 | /* Tracks info needed to ensure extents in layout obey constraints of spec */ | ||
219 | struct layout_verification { | ||
220 | u32 mode; /* R or RW */ | ||
221 | u64 start; /* Expected start of next non-COW extent */ | ||
222 | u64 inval; /* Start of INVAL coverage */ | ||
223 | u64 cowread; /* End of COW read coverage */ | ||
224 | }; | ||
225 | |||
226 | /* Verify the extent meets the layout requirements of the pnfs-block draft, | ||
227 | * section 2.3.1. | ||
228 | */ | ||
229 | static int verify_extent(struct pnfs_block_extent *be, | ||
230 | struct layout_verification *lv) | ||
231 | { | ||
232 | if (lv->mode == IOMODE_READ) { | ||
233 | if (be->be_state == PNFS_BLOCK_READWRITE_DATA || | ||
234 | be->be_state == PNFS_BLOCK_INVALID_DATA) | ||
235 | return -EIO; | ||
236 | if (be->be_f_offset != lv->start) | ||
237 | return -EIO; | ||
238 | lv->start += be->be_length; | ||
239 | return 0; | ||
240 | } | ||
241 | /* lv->mode == IOMODE_RW */ | ||
242 | if (be->be_state == PNFS_BLOCK_READWRITE_DATA) { | ||
243 | if (be->be_f_offset != lv->start) | ||
244 | return -EIO; | ||
245 | if (lv->cowread > lv->start) | ||
246 | return -EIO; | ||
247 | lv->start += be->be_length; | ||
248 | lv->inval = lv->start; | ||
249 | return 0; | ||
250 | } else if (be->be_state == PNFS_BLOCK_INVALID_DATA) { | ||
251 | if (be->be_f_offset != lv->start) | ||
252 | return -EIO; | ||
253 | lv->start += be->be_length; | ||
254 | return 0; | ||
255 | } else if (be->be_state == PNFS_BLOCK_READ_DATA) { | ||
256 | if (be->be_f_offset > lv->start) | ||
257 | return -EIO; | ||
258 | if (be->be_f_offset < lv->inval) | ||
259 | return -EIO; | ||
260 | if (be->be_f_offset < lv->cowread) | ||
261 | return -EIO; | ||
262 | /* It looks like you might want to min this with lv->start, | ||
263 | * but you really don't. | ||
264 | */ | ||
265 | lv->inval = lv->inval + be->be_length; | ||
266 | lv->cowread = be->be_f_offset + be->be_length; | ||
267 | return 0; | ||
268 | } else | ||
269 | return -EIO; | ||
270 | } | ||
271 | |||
272 | /* XDR decode pnfs_block_layout4 structure */ | ||
273 | int | ||
274 | nfs4_blk_process_layoutget(struct pnfs_layout_hdr *lo, | ||
275 | struct nfs4_layoutget_res *lgr, gfp_t gfp_flags) | ||
276 | { | ||
277 | struct pnfs_block_layout *bl = BLK_LO2EXT(lo); | ||
278 | int i, status = -EIO; | ||
279 | uint32_t count; | ||
280 | struct pnfs_block_extent *be = NULL, *save; | ||
281 | struct xdr_stream stream; | ||
282 | struct xdr_buf buf; | ||
283 | struct page *scratch; | ||
284 | __be32 *p; | ||
285 | struct layout_verification lv = { | ||
286 | .mode = lgr->range.iomode, | ||
287 | .start = lgr->range.offset >> SECTOR_SHIFT, | ||
288 | .inval = lgr->range.offset >> SECTOR_SHIFT, | ||
289 | .cowread = lgr->range.offset >> SECTOR_SHIFT, | ||
290 | }; | ||
291 | LIST_HEAD(extents); | ||
292 | |||
293 | dprintk("---> %s\n", __func__); | ||
294 | |||
295 | scratch = alloc_page(gfp_flags); | ||
296 | if (!scratch) | ||
297 | return -ENOMEM; | ||
298 | |||
299 | xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, lgr->layoutp->len); | ||
300 | xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); | ||
301 | |||
302 | p = xdr_inline_decode(&stream, 4); | ||
303 | if (unlikely(!p)) | ||
304 | goto out_err; | ||
305 | |||
306 | count = be32_to_cpup(p++); | ||
307 | |||
308 | dprintk("%s enter, number of extents %i\n", __func__, count); | ||
309 | p = xdr_inline_decode(&stream, (28 + NFS4_DEVICEID4_SIZE) * count); | ||
310 | if (unlikely(!p)) | ||
311 | goto out_err; | ||
312 | |||
313 | /* Decode individual extents, putting them in temporary | ||
314 | * staging area until whole layout is decoded to make error | ||
315 | * recovery easier. | ||
316 | */ | ||
317 | for (i = 0; i < count; i++) { | ||
318 | be = bl_alloc_extent(); | ||
319 | if (!be) { | ||
320 | status = -ENOMEM; | ||
321 | goto out_err; | ||
322 | } | ||
323 | memcpy(&be->be_devid, p, NFS4_DEVICEID4_SIZE); | ||
324 | p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE); | ||
325 | be->be_mdev = translate_devid(lo, &be->be_devid); | ||
326 | if (!be->be_mdev) | ||
327 | goto out_err; | ||
328 | |||
329 | /* The next three values are read in as bytes, | ||
330 | * but stored as 512-byte sector lengths | ||
331 | */ | ||
332 | if (decode_sector_number(&p, &be->be_f_offset) < 0) | ||
333 | goto out_err; | ||
334 | if (decode_sector_number(&p, &be->be_length) < 0) | ||
335 | goto out_err; | ||
336 | if (decode_sector_number(&p, &be->be_v_offset) < 0) | ||
337 | goto out_err; | ||
338 | be->be_state = be32_to_cpup(p++); | ||
339 | if (be->be_state == PNFS_BLOCK_INVALID_DATA) | ||
340 | be->be_inval = &bl->bl_inval; | ||
341 | if (verify_extent(be, &lv)) { | ||
342 | dprintk("%s verify failed\n", __func__); | ||
343 | goto out_err; | ||
344 | } | ||
345 | list_add_tail(&be->be_node, &extents); | ||
346 | } | ||
347 | if (lgr->range.offset + lgr->range.length != | ||
348 | lv.start << SECTOR_SHIFT) { | ||
349 | dprintk("%s Final length mismatch\n", __func__); | ||
350 | be = NULL; | ||
351 | goto out_err; | ||
352 | } | ||
353 | if (lv.start < lv.cowread) { | ||
354 | dprintk("%s Final uncovered COW extent\n", __func__); | ||
355 | be = NULL; | ||
356 | goto out_err; | ||
357 | } | ||
358 | /* Extents decoded properly, now try to merge them in to | ||
359 | * existing layout extents. | ||
360 | */ | ||
361 | spin_lock(&bl->bl_ext_lock); | ||
362 | list_for_each_entry_safe(be, save, &extents, be_node) { | ||
363 | list_del(&be->be_node); | ||
364 | status = bl_add_merge_extent(bl, be); | ||
365 | if (status) { | ||
366 | spin_unlock(&bl->bl_ext_lock); | ||
367 | /* This is a fairly catastrophic error, as the | ||
368 | * entire layout extent lists are now corrupted. | ||
369 | * We should have some way to distinguish this. | ||
370 | */ | ||
371 | be = NULL; | ||
372 | goto out_err; | ||
373 | } | ||
374 | } | ||
375 | spin_unlock(&bl->bl_ext_lock); | ||
376 | status = 0; | ||
377 | out: | ||
378 | __free_page(scratch); | ||
379 | dprintk("%s returns %i\n", __func__, status); | ||
380 | return status; | ||
381 | |||
382 | out_err: | ||
383 | bl_put_extent(be); | ||
384 | while (!list_empty(&extents)) { | ||
385 | be = list_first_entry(&extents, struct pnfs_block_extent, | ||
386 | be_node); | ||
387 | list_del(&be->be_node); | ||
388 | bl_put_extent(be); | ||
389 | } | ||
390 | goto out; | ||
391 | } | ||
diff --git a/fs/nfs/blocklayout/blocklayoutdm.c b/fs/nfs/blocklayout/blocklayoutdm.c new file mode 100644 index 00000000000..d055c755807 --- /dev/null +++ b/fs/nfs/blocklayout/blocklayoutdm.c | |||
@@ -0,0 +1,111 @@ | |||
1 | /* | ||
2 | * linux/fs/nfs/blocklayout/blocklayoutdm.c | ||
3 | * | ||
4 | * Module for the NFSv4.1 pNFS block layout driver. | ||
5 | * | ||
6 | * Copyright (c) 2007 The Regents of the University of Michigan. | ||
7 | * All rights reserved. | ||
8 | * | ||
9 | * Fred Isaman <iisaman@umich.edu> | ||
10 | * Andy Adamson <andros@citi.umich.edu> | ||
11 | * | ||
12 | * permission is granted to use, copy, create derivative works and | ||
13 | * redistribute this software and such derivative works for any purpose, | ||
14 | * so long as the name of the university of michigan is not used in | ||
15 | * any advertising or publicity pertaining to the use or distribution | ||
16 | * of this software without specific, written prior authorization. if | ||
17 | * the above copyright notice or any other identification of the | ||
18 | * university of michigan is included in any copy of any portion of | ||
19 | * this software, then the disclaimer below must also be included. | ||
20 | * | ||
21 | * this software is provided as is, without representation from the | ||
22 | * university of michigan as to its fitness for any purpose, and without | ||
23 | * warranty by the university of michigan of any kind, either express | ||
24 | * or implied, including without limitation the implied warranties of | ||
25 | * merchantability and fitness for a particular purpose. the regents | ||
26 | * of the university of michigan shall not be liable for any damages, | ||
27 | * including special, indirect, incidental, or consequential damages, | ||
28 | * with respect to any claim arising out or in connection with the use | ||
29 | * of the software, even if it has been or is hereafter advised of the | ||
30 | * possibility of such damages. | ||
31 | */ | ||
32 | |||
33 | #include <linux/genhd.h> /* gendisk - used in a dprintk*/ | ||
34 | #include <linux/sched.h> | ||
35 | #include <linux/hash.h> | ||
36 | |||
37 | #include "blocklayout.h" | ||
38 | |||
39 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | ||
40 | |||
41 | static void dev_remove(dev_t dev) | ||
42 | { | ||
43 | struct rpc_pipe_msg msg; | ||
44 | struct bl_dev_msg bl_umount_request; | ||
45 | struct bl_msg_hdr bl_msg = { | ||
46 | .type = BL_DEVICE_UMOUNT, | ||
47 | .totallen = sizeof(bl_umount_request), | ||
48 | }; | ||
49 | uint8_t *dataptr; | ||
50 | DECLARE_WAITQUEUE(wq, current); | ||
51 | |||
52 | dprintk("Entering %s\n", __func__); | ||
53 | |||
54 | memset(&msg, 0, sizeof(msg)); | ||
55 | msg.data = kzalloc(1 + sizeof(bl_umount_request), GFP_NOFS); | ||
56 | if (!msg.data) | ||
57 | goto out; | ||
58 | |||
59 | memset(&bl_umount_request, 0, sizeof(bl_umount_request)); | ||
60 | bl_umount_request.major = MAJOR(dev); | ||
61 | bl_umount_request.minor = MINOR(dev); | ||
62 | |||
63 | memcpy(msg.data, &bl_msg, sizeof(bl_msg)); | ||
64 | dataptr = (uint8_t *) msg.data; | ||
65 | memcpy(&dataptr[sizeof(bl_msg)], &bl_umount_request, sizeof(bl_umount_request)); | ||
66 | msg.len = sizeof(bl_msg) + bl_msg.totallen; | ||
67 | |||
68 | add_wait_queue(&bl_wq, &wq); | ||
69 | if (rpc_queue_upcall(bl_device_pipe->d_inode, &msg) < 0) { | ||
70 | remove_wait_queue(&bl_wq, &wq); | ||
71 | goto out; | ||
72 | } | ||
73 | |||
74 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
75 | schedule(); | ||
76 | __set_current_state(TASK_RUNNING); | ||
77 | remove_wait_queue(&bl_wq, &wq); | ||
78 | |||
79 | out: | ||
80 | kfree(msg.data); | ||
81 | } | ||
82 | |||
83 | /* | ||
84 | * Release meta device | ||
85 | */ | ||
86 | static void nfs4_blk_metadev_release(struct pnfs_block_dev *bdev) | ||
87 | { | ||
88 | int rv; | ||
89 | |||
90 | dprintk("%s Releasing\n", __func__); | ||
91 | rv = nfs4_blkdev_put(bdev->bm_mdev); | ||
92 | if (rv) | ||
93 | printk(KERN_ERR "%s nfs4_blkdev_put returns %d\n", | ||
94 | __func__, rv); | ||
95 | |||
96 | dev_remove(bdev->bm_mdev->bd_dev); | ||
97 | } | ||
98 | |||
99 | void bl_free_block_dev(struct pnfs_block_dev *bdev) | ||
100 | { | ||
101 | if (bdev) { | ||
102 | if (bdev->bm_mdev) { | ||
103 | dprintk("%s Removing DM device: %d:%d\n", | ||
104 | __func__, | ||
105 | MAJOR(bdev->bm_mdev->bd_dev), | ||
106 | MINOR(bdev->bm_mdev->bd_dev)); | ||
107 | nfs4_blk_metadev_release(bdev); | ||
108 | } | ||
109 | kfree(bdev); | ||
110 | } | ||
111 | } | ||
diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c new file mode 100644 index 00000000000..19fa7b0b8c0 --- /dev/null +++ b/fs/nfs/blocklayout/extents.c | |||
@@ -0,0 +1,935 @@ | |||
1 | /* | ||
2 | * linux/fs/nfs/blocklayout/blocklayout.h | ||
3 | * | ||
4 | * Module for the NFSv4.1 pNFS block layout driver. | ||
5 | * | ||
6 | * Copyright (c) 2006 The Regents of the University of Michigan. | ||
7 | * All rights reserved. | ||
8 | * | ||
9 | * Andy Adamson <andros@citi.umich.edu> | ||
10 | * Fred Isaman <iisaman@umich.edu> | ||
11 | * | ||
12 | * permission is granted to use, copy, create derivative works and | ||
13 | * redistribute this software and such derivative works for any purpose, | ||
14 | * so long as the name of the university of michigan is not used in | ||
15 | * any advertising or publicity pertaining to the use or distribution | ||
16 | * of this software without specific, written prior authorization. if | ||
17 | * the above copyright notice or any other identification of the | ||
18 | * university of michigan is included in any copy of any portion of | ||
19 | * this software, then the disclaimer below must also be included. | ||
20 | * | ||
21 | * this software is provided as is, without representation from the | ||
22 | * university of michigan as to its fitness for any purpose, and without | ||
23 | * warranty by the university of michigan of any kind, either express | ||
24 | * or implied, including without limitation the implied warranties of | ||
25 | * merchantability and fitness for a particular purpose. the regents | ||
26 | * of the university of michigan shall not be liable for any damages, | ||
27 | * including special, indirect, incidental, or consequential damages, | ||
28 | * with respect to any claim arising out or in connection with the use | ||
29 | * of the software, even if it has been or is hereafter advised of the | ||
30 | * possibility of such damages. | ||
31 | */ | ||
32 | |||
33 | #include "blocklayout.h" | ||
34 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | ||
35 | |||
36 | /* Bit numbers */ | ||
37 | #define EXTENT_INITIALIZED 0 | ||
38 | #define EXTENT_WRITTEN 1 | ||
39 | #define EXTENT_IN_COMMIT 2 | ||
40 | #define INTERNAL_EXISTS MY_MAX_TAGS | ||
41 | #define INTERNAL_MASK ((1 << INTERNAL_EXISTS) - 1) | ||
42 | |||
43 | /* Returns largest t<=s s.t. t%base==0 */ | ||
44 | static inline sector_t normalize(sector_t s, int base) | ||
45 | { | ||
46 | sector_t tmp = s; /* Since do_div modifies its argument */ | ||
47 | return s - do_div(tmp, base); | ||
48 | } | ||
49 | |||
50 | static inline sector_t normalize_up(sector_t s, int base) | ||
51 | { | ||
52 | return normalize(s + base - 1, base); | ||
53 | } | ||
54 | |||
55 | /* Complete stub using list while determine API wanted */ | ||
56 | |||
57 | /* Returns tags, or negative */ | ||
58 | static int32_t _find_entry(struct my_tree *tree, u64 s) | ||
59 | { | ||
60 | struct pnfs_inval_tracking *pos; | ||
61 | |||
62 | dprintk("%s(%llu) enter\n", __func__, s); | ||
63 | list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) { | ||
64 | if (pos->it_sector > s) | ||
65 | continue; | ||
66 | else if (pos->it_sector == s) | ||
67 | return pos->it_tags & INTERNAL_MASK; | ||
68 | else | ||
69 | break; | ||
70 | } | ||
71 | return -ENOENT; | ||
72 | } | ||
73 | |||
74 | static inline | ||
75 | int _has_tag(struct my_tree *tree, u64 s, int32_t tag) | ||
76 | { | ||
77 | int32_t tags; | ||
78 | |||
79 | dprintk("%s(%llu, %i) enter\n", __func__, s, tag); | ||
80 | s = normalize(s, tree->mtt_step_size); | ||
81 | tags = _find_entry(tree, s); | ||
82 | if ((tags < 0) || !(tags & (1 << tag))) | ||
83 | return 0; | ||
84 | else | ||
85 | return 1; | ||
86 | } | ||
87 | |||
88 | /* Creates entry with tag, or if entry already exists, unions tag to it. | ||
89 | * If storage is not NULL, newly created entry will use it. | ||
90 | * Returns number of entries added, or negative on error. | ||
91 | */ | ||
92 | static int _add_entry(struct my_tree *tree, u64 s, int32_t tag, | ||
93 | struct pnfs_inval_tracking *storage) | ||
94 | { | ||
95 | int found = 0; | ||
96 | struct pnfs_inval_tracking *pos; | ||
97 | |||
98 | dprintk("%s(%llu, %i, %p) enter\n", __func__, s, tag, storage); | ||
99 | list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) { | ||
100 | if (pos->it_sector > s) | ||
101 | continue; | ||
102 | else if (pos->it_sector == s) { | ||
103 | found = 1; | ||
104 | break; | ||
105 | } else | ||
106 | break; | ||
107 | } | ||
108 | if (found) { | ||
109 | pos->it_tags |= (1 << tag); | ||
110 | return 0; | ||
111 | } else { | ||
112 | struct pnfs_inval_tracking *new; | ||
113 | if (storage) | ||
114 | new = storage; | ||
115 | else { | ||
116 | new = kmalloc(sizeof(*new), GFP_NOFS); | ||
117 | if (!new) | ||
118 | return -ENOMEM; | ||
119 | } | ||
120 | new->it_sector = s; | ||
121 | new->it_tags = (1 << tag); | ||
122 | list_add(&new->it_link, &pos->it_link); | ||
123 | return 1; | ||
124 | } | ||
125 | } | ||
126 | |||
127 | /* XXXX Really want option to not create */ | ||
128 | /* Over range, unions tag with existing entries, else creates entry with tag */ | ||
129 | static int _set_range(struct my_tree *tree, int32_t tag, u64 s, u64 length) | ||
130 | { | ||
131 | u64 i; | ||
132 | |||
133 | dprintk("%s(%i, %llu, %llu) enter\n", __func__, tag, s, length); | ||
134 | for (i = normalize(s, tree->mtt_step_size); i < s + length; | ||
135 | i += tree->mtt_step_size) | ||
136 | if (_add_entry(tree, i, tag, NULL)) | ||
137 | return -ENOMEM; | ||
138 | return 0; | ||
139 | } | ||
140 | |||
141 | /* Ensure that future operations on given range of tree will not malloc */ | ||
142 | static int _preload_range(struct my_tree *tree, u64 offset, u64 length) | ||
143 | { | ||
144 | u64 start, end, s; | ||
145 | int count, i, used = 0, status = -ENOMEM; | ||
146 | struct pnfs_inval_tracking **storage; | ||
147 | |||
148 | dprintk("%s(%llu, %llu) enter\n", __func__, offset, length); | ||
149 | start = normalize(offset, tree->mtt_step_size); | ||
150 | end = normalize_up(offset + length, tree->mtt_step_size); | ||
151 | count = (int)(end - start) / (int)tree->mtt_step_size; | ||
152 | |||
153 | /* Pre-malloc what memory we might need */ | ||
154 | storage = kmalloc(sizeof(*storage) * count, GFP_NOFS); | ||
155 | if (!storage) | ||
156 | return -ENOMEM; | ||
157 | for (i = 0; i < count; i++) { | ||
158 | storage[i] = kmalloc(sizeof(struct pnfs_inval_tracking), | ||
159 | GFP_NOFS); | ||
160 | if (!storage[i]) | ||
161 | goto out_cleanup; | ||
162 | } | ||
163 | |||
164 | /* Now need lock - HOW??? */ | ||
165 | |||
166 | for (s = start; s < end; s += tree->mtt_step_size) | ||
167 | used += _add_entry(tree, s, INTERNAL_EXISTS, storage[used]); | ||
168 | |||
169 | /* Unlock - HOW??? */ | ||
170 | status = 0; | ||
171 | |||
172 | out_cleanup: | ||
173 | for (i = used; i < count; i++) { | ||
174 | if (!storage[i]) | ||
175 | break; | ||
176 | kfree(storage[i]); | ||
177 | } | ||
178 | kfree(storage); | ||
179 | return status; | ||
180 | } | ||
181 | |||
182 | static void set_needs_init(sector_t *array, sector_t offset) | ||
183 | { | ||
184 | sector_t *p = array; | ||
185 | |||
186 | dprintk("%s enter\n", __func__); | ||
187 | if (!p) | ||
188 | return; | ||
189 | while (*p < offset) | ||
190 | p++; | ||
191 | if (*p == offset) | ||
192 | return; | ||
193 | else if (*p == ~0) { | ||
194 | *p++ = offset; | ||
195 | *p = ~0; | ||
196 | return; | ||
197 | } else { | ||
198 | sector_t *save = p; | ||
199 | dprintk("%s Adding %llu\n", __func__, (u64)offset); | ||
200 | while (*p != ~0) | ||
201 | p++; | ||
202 | p++; | ||
203 | memmove(save + 1, save, (char *)p - (char *)save); | ||
204 | *save = offset; | ||
205 | return; | ||
206 | } | ||
207 | } | ||
208 | |||
209 | /* We are relying on page lock to serialize this */ | ||
210 | int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect) | ||
211 | { | ||
212 | int rv; | ||
213 | |||
214 | spin_lock(&marks->im_lock); | ||
215 | rv = _has_tag(&marks->im_tree, isect, EXTENT_INITIALIZED); | ||
216 | spin_unlock(&marks->im_lock); | ||
217 | return rv; | ||
218 | } | ||
219 | |||
220 | /* Assume start, end already sector aligned */ | ||
221 | static int | ||
222 | _range_has_tag(struct my_tree *tree, u64 start, u64 end, int32_t tag) | ||
223 | { | ||
224 | struct pnfs_inval_tracking *pos; | ||
225 | u64 expect = 0; | ||
226 | |||
227 | dprintk("%s(%llu, %llu, %i) enter\n", __func__, start, end, tag); | ||
228 | list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) { | ||
229 | if (pos->it_sector >= end) | ||
230 | continue; | ||
231 | if (!expect) { | ||
232 | if ((pos->it_sector == end - tree->mtt_step_size) && | ||
233 | (pos->it_tags & (1 << tag))) { | ||
234 | expect = pos->it_sector - tree->mtt_step_size; | ||
235 | if (pos->it_sector < tree->mtt_step_size || expect < start) | ||
236 | return 1; | ||
237 | continue; | ||
238 | } else { | ||
239 | return 0; | ||
240 | } | ||
241 | } | ||
242 | if (pos->it_sector != expect || !(pos->it_tags & (1 << tag))) | ||
243 | return 0; | ||
244 | expect -= tree->mtt_step_size; | ||
245 | if (expect < start) | ||
246 | return 1; | ||
247 | } | ||
248 | return 0; | ||
249 | } | ||
250 | |||
251 | static int is_range_written(struct pnfs_inval_markings *marks, | ||
252 | sector_t start, sector_t end) | ||
253 | { | ||
254 | int rv; | ||
255 | |||
256 | spin_lock(&marks->im_lock); | ||
257 | rv = _range_has_tag(&marks->im_tree, start, end, EXTENT_WRITTEN); | ||
258 | spin_unlock(&marks->im_lock); | ||
259 | return rv; | ||
260 | } | ||
261 | |||
262 | /* Marks sectors in [offest, offset_length) as having been initialized. | ||
263 | * All lengths are step-aligned, where step is min(pagesize, blocksize). | ||
264 | * Notes where partial block is initialized, and helps prepare it for | ||
265 | * complete initialization later. | ||
266 | */ | ||
267 | /* Currently assumes offset is page-aligned */ | ||
268 | int bl_mark_sectors_init(struct pnfs_inval_markings *marks, | ||
269 | sector_t offset, sector_t length, | ||
270 | sector_t **pages) | ||
271 | { | ||
272 | sector_t s, start, end; | ||
273 | sector_t *array = NULL; /* Pages to mark */ | ||
274 | |||
275 | dprintk("%s(offset=%llu,len=%llu) enter\n", | ||
276 | __func__, (u64)offset, (u64)length); | ||
277 | s = max((sector_t) 3, | ||
278 | 2 * (marks->im_block_size / (PAGE_CACHE_SECTORS))); | ||
279 | dprintk("%s set max=%llu\n", __func__, (u64)s); | ||
280 | if (pages) { | ||
281 | array = kmalloc(s * sizeof(sector_t), GFP_NOFS); | ||
282 | if (!array) | ||
283 | goto outerr; | ||
284 | array[0] = ~0; | ||
285 | } | ||
286 | |||
287 | start = normalize(offset, marks->im_block_size); | ||
288 | end = normalize_up(offset + length, marks->im_block_size); | ||
289 | if (_preload_range(&marks->im_tree, start, end - start)) | ||
290 | goto outerr; | ||
291 | |||
292 | spin_lock(&marks->im_lock); | ||
293 | |||
294 | for (s = normalize_up(start, PAGE_CACHE_SECTORS); | ||
295 | s < offset; s += PAGE_CACHE_SECTORS) { | ||
296 | dprintk("%s pre-area pages\n", __func__); | ||
297 | /* Portion of used block is not initialized */ | ||
298 | if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED)) | ||
299 | set_needs_init(array, s); | ||
300 | } | ||
301 | if (_set_range(&marks->im_tree, EXTENT_INITIALIZED, offset, length)) | ||
302 | goto out_unlock; | ||
303 | for (s = normalize_up(offset + length, PAGE_CACHE_SECTORS); | ||
304 | s < end; s += PAGE_CACHE_SECTORS) { | ||
305 | dprintk("%s post-area pages\n", __func__); | ||
306 | if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED)) | ||
307 | set_needs_init(array, s); | ||
308 | } | ||
309 | |||
310 | spin_unlock(&marks->im_lock); | ||
311 | |||
312 | if (pages) { | ||
313 | if (array[0] == ~0) { | ||
314 | kfree(array); | ||
315 | *pages = NULL; | ||
316 | } else | ||
317 | *pages = array; | ||
318 | } | ||
319 | return 0; | ||
320 | |||
321 | out_unlock: | ||
322 | spin_unlock(&marks->im_lock); | ||
323 | outerr: | ||
324 | if (pages) { | ||
325 | kfree(array); | ||
326 | *pages = NULL; | ||
327 | } | ||
328 | return -ENOMEM; | ||
329 | } | ||
330 | |||
331 | /* Marks sectors in [offest, offset+length) as having been written to disk. | ||
332 | * All lengths should be block aligned. | ||
333 | */ | ||
334 | static int mark_written_sectors(struct pnfs_inval_markings *marks, | ||
335 | sector_t offset, sector_t length) | ||
336 | { | ||
337 | int status; | ||
338 | |||
339 | dprintk("%s(offset=%llu,len=%llu) enter\n", __func__, | ||
340 | (u64)offset, (u64)length); | ||
341 | spin_lock(&marks->im_lock); | ||
342 | status = _set_range(&marks->im_tree, EXTENT_WRITTEN, offset, length); | ||
343 | spin_unlock(&marks->im_lock); | ||
344 | return status; | ||
345 | } | ||
346 | |||
347 | static void print_short_extent(struct pnfs_block_short_extent *be) | ||
348 | { | ||
349 | dprintk("PRINT SHORT EXTENT extent %p\n", be); | ||
350 | if (be) { | ||
351 | dprintk(" be_f_offset %llu\n", (u64)be->bse_f_offset); | ||
352 | dprintk(" be_length %llu\n", (u64)be->bse_length); | ||
353 | } | ||
354 | } | ||
355 | |||
356 | static void print_clist(struct list_head *list, unsigned int count) | ||
357 | { | ||
358 | struct pnfs_block_short_extent *be; | ||
359 | unsigned int i = 0; | ||
360 | |||
361 | ifdebug(FACILITY) { | ||
362 | printk(KERN_DEBUG "****************\n"); | ||
363 | printk(KERN_DEBUG "Extent list looks like:\n"); | ||
364 | list_for_each_entry(be, list, bse_node) { | ||
365 | i++; | ||
366 | print_short_extent(be); | ||
367 | } | ||
368 | if (i != count) | ||
369 | printk(KERN_DEBUG "\n\nExpected %u entries\n\n\n", count); | ||
370 | printk(KERN_DEBUG "****************\n"); | ||
371 | } | ||
372 | } | ||
373 | |||
374 | /* Note: In theory, we should do more checking that devid's match between | ||
375 | * old and new, but if they don't, the lists are too corrupt to salvage anyway. | ||
376 | */ | ||
377 | /* Note this is very similar to bl_add_merge_extent */ | ||
378 | static void add_to_commitlist(struct pnfs_block_layout *bl, | ||
379 | struct pnfs_block_short_extent *new) | ||
380 | { | ||
381 | struct list_head *clist = &bl->bl_commit; | ||
382 | struct pnfs_block_short_extent *old, *save; | ||
383 | sector_t end = new->bse_f_offset + new->bse_length; | ||
384 | |||
385 | dprintk("%s enter\n", __func__); | ||
386 | print_short_extent(new); | ||
387 | print_clist(clist, bl->bl_count); | ||
388 | bl->bl_count++; | ||
389 | /* Scan for proper place to insert, extending new to the left | ||
390 | * as much as possible. | ||
391 | */ | ||
392 | list_for_each_entry_safe(old, save, clist, bse_node) { | ||
393 | if (new->bse_f_offset < old->bse_f_offset) | ||
394 | break; | ||
395 | if (end <= old->bse_f_offset + old->bse_length) { | ||
396 | /* Range is already in list */ | ||
397 | bl->bl_count--; | ||
398 | kfree(new); | ||
399 | return; | ||
400 | } else if (new->bse_f_offset <= | ||
401 | old->bse_f_offset + old->bse_length) { | ||
402 | /* new overlaps or abuts existing be */ | ||
403 | if (new->bse_mdev == old->bse_mdev) { | ||
404 | /* extend new to fully replace old */ | ||
405 | new->bse_length += new->bse_f_offset - | ||
406 | old->bse_f_offset; | ||
407 | new->bse_f_offset = old->bse_f_offset; | ||
408 | list_del(&old->bse_node); | ||
409 | bl->bl_count--; | ||
410 | kfree(old); | ||
411 | } | ||
412 | } | ||
413 | } | ||
414 | /* Note that if we never hit the above break, old will not point to a | ||
415 | * valid extent. However, in that case &old->bse_node==list. | ||
416 | */ | ||
417 | list_add_tail(&new->bse_node, &old->bse_node); | ||
418 | /* Scan forward for overlaps. If we find any, extend new and | ||
419 | * remove the overlapped extent. | ||
420 | */ | ||
421 | old = list_prepare_entry(new, clist, bse_node); | ||
422 | list_for_each_entry_safe_continue(old, save, clist, bse_node) { | ||
423 | if (end < old->bse_f_offset) | ||
424 | break; | ||
425 | /* new overlaps or abuts old */ | ||
426 | if (new->bse_mdev == old->bse_mdev) { | ||
427 | if (end < old->bse_f_offset + old->bse_length) { | ||
428 | /* extend new to fully cover old */ | ||
429 | end = old->bse_f_offset + old->bse_length; | ||
430 | new->bse_length = end - new->bse_f_offset; | ||
431 | } | ||
432 | list_del(&old->bse_node); | ||
433 | bl->bl_count--; | ||
434 | kfree(old); | ||
435 | } | ||
436 | } | ||
437 | dprintk("%s: after merging\n", __func__); | ||
438 | print_clist(clist, bl->bl_count); | ||
439 | } | ||
440 | |||
441 | /* Note the range described by offset, length is guaranteed to be contained | ||
442 | * within be. | ||
443 | */ | ||
444 | int bl_mark_for_commit(struct pnfs_block_extent *be, | ||
445 | sector_t offset, sector_t length) | ||
446 | { | ||
447 | sector_t new_end, end = offset + length; | ||
448 | struct pnfs_block_short_extent *new; | ||
449 | struct pnfs_block_layout *bl = container_of(be->be_inval, | ||
450 | struct pnfs_block_layout, | ||
451 | bl_inval); | ||
452 | |||
453 | new = kmalloc(sizeof(*new), GFP_NOFS); | ||
454 | if (!new) | ||
455 | return -ENOMEM; | ||
456 | |||
457 | mark_written_sectors(be->be_inval, offset, length); | ||
458 | /* We want to add the range to commit list, but it must be | ||
459 | * block-normalized, and verified that the normalized range has | ||
460 | * been entirely written to disk. | ||
461 | */ | ||
462 | new->bse_f_offset = offset; | ||
463 | offset = normalize(offset, bl->bl_blocksize); | ||
464 | if (offset < new->bse_f_offset) { | ||
465 | if (is_range_written(be->be_inval, offset, new->bse_f_offset)) | ||
466 | new->bse_f_offset = offset; | ||
467 | else | ||
468 | new->bse_f_offset = offset + bl->bl_blocksize; | ||
469 | } | ||
470 | new_end = normalize_up(end, bl->bl_blocksize); | ||
471 | if (end < new_end) { | ||
472 | if (is_range_written(be->be_inval, end, new_end)) | ||
473 | end = new_end; | ||
474 | else | ||
475 | end = new_end - bl->bl_blocksize; | ||
476 | } | ||
477 | if (end <= new->bse_f_offset) { | ||
478 | kfree(new); | ||
479 | return 0; | ||
480 | } | ||
481 | new->bse_length = end - new->bse_f_offset; | ||
482 | new->bse_devid = be->be_devid; | ||
483 | new->bse_mdev = be->be_mdev; | ||
484 | |||
485 | spin_lock(&bl->bl_ext_lock); | ||
486 | /* new will be freed, either by add_to_commitlist if it decides not | ||
487 | * to use it, or after LAYOUTCOMMIT uses it in the commitlist. | ||
488 | */ | ||
489 | add_to_commitlist(bl, new); | ||
490 | spin_unlock(&bl->bl_ext_lock); | ||
491 | return 0; | ||
492 | } | ||
493 | |||
494 | static void print_bl_extent(struct pnfs_block_extent *be) | ||
495 | { | ||
496 | dprintk("PRINT EXTENT extent %p\n", be); | ||
497 | if (be) { | ||
498 | dprintk(" be_f_offset %llu\n", (u64)be->be_f_offset); | ||
499 | dprintk(" be_length %llu\n", (u64)be->be_length); | ||
500 | dprintk(" be_v_offset %llu\n", (u64)be->be_v_offset); | ||
501 | dprintk(" be_state %d\n", be->be_state); | ||
502 | } | ||
503 | } | ||
504 | |||
505 | static void | ||
506 | destroy_extent(struct kref *kref) | ||
507 | { | ||
508 | struct pnfs_block_extent *be; | ||
509 | |||
510 | be = container_of(kref, struct pnfs_block_extent, be_refcnt); | ||
511 | dprintk("%s be=%p\n", __func__, be); | ||
512 | kfree(be); | ||
513 | } | ||
514 | |||
515 | void | ||
516 | bl_put_extent(struct pnfs_block_extent *be) | ||
517 | { | ||
518 | if (be) { | ||
519 | dprintk("%s enter %p (%i)\n", __func__, be, | ||
520 | atomic_read(&be->be_refcnt.refcount)); | ||
521 | kref_put(&be->be_refcnt, destroy_extent); | ||
522 | } | ||
523 | } | ||
524 | |||
525 | struct pnfs_block_extent *bl_alloc_extent(void) | ||
526 | { | ||
527 | struct pnfs_block_extent *be; | ||
528 | |||
529 | be = kmalloc(sizeof(struct pnfs_block_extent), GFP_NOFS); | ||
530 | if (!be) | ||
531 | return NULL; | ||
532 | INIT_LIST_HEAD(&be->be_node); | ||
533 | kref_init(&be->be_refcnt); | ||
534 | be->be_inval = NULL; | ||
535 | return be; | ||
536 | } | ||
537 | |||
538 | static void print_elist(struct list_head *list) | ||
539 | { | ||
540 | struct pnfs_block_extent *be; | ||
541 | dprintk("****************\n"); | ||
542 | dprintk("Extent list looks like:\n"); | ||
543 | list_for_each_entry(be, list, be_node) { | ||
544 | print_bl_extent(be); | ||
545 | } | ||
546 | dprintk("****************\n"); | ||
547 | } | ||
548 | |||
549 | static inline int | ||
550 | extents_consistent(struct pnfs_block_extent *old, struct pnfs_block_extent *new) | ||
551 | { | ||
552 | /* Note this assumes new->be_f_offset >= old->be_f_offset */ | ||
553 | return (new->be_state == old->be_state) && | ||
554 | ((new->be_state == PNFS_BLOCK_NONE_DATA) || | ||
555 | ((new->be_v_offset - old->be_v_offset == | ||
556 | new->be_f_offset - old->be_f_offset) && | ||
557 | new->be_mdev == old->be_mdev)); | ||
558 | } | ||
559 | |||
560 | /* Adds new to appropriate list in bl, modifying new and removing existing | ||
561 | * extents as appropriate to deal with overlaps. | ||
562 | * | ||
563 | * See bl_find_get_extent for list constraints. | ||
564 | * | ||
565 | * Refcount on new is already set. If end up not using it, or error out, | ||
566 | * need to put the reference. | ||
567 | * | ||
568 | * bl->bl_ext_lock is held by caller. | ||
569 | */ | ||
570 | int | ||
571 | bl_add_merge_extent(struct pnfs_block_layout *bl, | ||
572 | struct pnfs_block_extent *new) | ||
573 | { | ||
574 | struct pnfs_block_extent *be, *tmp; | ||
575 | sector_t end = new->be_f_offset + new->be_length; | ||
576 | struct list_head *list; | ||
577 | |||
578 | dprintk("%s enter with be=%p\n", __func__, new); | ||
579 | print_bl_extent(new); | ||
580 | list = &bl->bl_extents[bl_choose_list(new->be_state)]; | ||
581 | print_elist(list); | ||
582 | |||
583 | /* Scan for proper place to insert, extending new to the left | ||
584 | * as much as possible. | ||
585 | */ | ||
586 | list_for_each_entry_safe_reverse(be, tmp, list, be_node) { | ||
587 | if (new->be_f_offset >= be->be_f_offset + be->be_length) | ||
588 | break; | ||
589 | if (new->be_f_offset >= be->be_f_offset) { | ||
590 | if (end <= be->be_f_offset + be->be_length) { | ||
591 | /* new is a subset of existing be*/ | ||
592 | if (extents_consistent(be, new)) { | ||
593 | dprintk("%s: new is subset, ignoring\n", | ||
594 | __func__); | ||
595 | bl_put_extent(new); | ||
596 | return 0; | ||
597 | } else { | ||
598 | goto out_err; | ||
599 | } | ||
600 | } else { | ||
601 | /* |<-- be -->| | ||
602 | * |<-- new -->| */ | ||
603 | if (extents_consistent(be, new)) { | ||
604 | /* extend new to fully replace be */ | ||
605 | new->be_length += new->be_f_offset - | ||
606 | be->be_f_offset; | ||
607 | new->be_f_offset = be->be_f_offset; | ||
608 | new->be_v_offset = be->be_v_offset; | ||
609 | dprintk("%s: removing %p\n", __func__, be); | ||
610 | list_del(&be->be_node); | ||
611 | bl_put_extent(be); | ||
612 | } else { | ||
613 | goto out_err; | ||
614 | } | ||
615 | } | ||
616 | } else if (end >= be->be_f_offset + be->be_length) { | ||
617 | /* new extent overlap existing be */ | ||
618 | if (extents_consistent(be, new)) { | ||
619 | /* extend new to fully replace be */ | ||
620 | dprintk("%s: removing %p\n", __func__, be); | ||
621 | list_del(&be->be_node); | ||
622 | bl_put_extent(be); | ||
623 | } else { | ||
624 | goto out_err; | ||
625 | } | ||
626 | } else if (end > be->be_f_offset) { | ||
627 | /* |<-- be -->| | ||
628 | *|<-- new -->| */ | ||
629 | if (extents_consistent(new, be)) { | ||
630 | /* extend new to fully replace be */ | ||
631 | new->be_length += be->be_f_offset + be->be_length - | ||
632 | new->be_f_offset - new->be_length; | ||
633 | dprintk("%s: removing %p\n", __func__, be); | ||
634 | list_del(&be->be_node); | ||
635 | bl_put_extent(be); | ||
636 | } else { | ||
637 | goto out_err; | ||
638 | } | ||
639 | } | ||
640 | } | ||
641 | /* Note that if we never hit the above break, be will not point to a | ||
642 | * valid extent. However, in that case &be->be_node==list. | ||
643 | */ | ||
644 | list_add(&new->be_node, &be->be_node); | ||
645 | dprintk("%s: inserting new\n", __func__); | ||
646 | print_elist(list); | ||
647 | /* FIXME - The per-list consistency checks have all been done, | ||
648 | * should now check cross-list consistency. | ||
649 | */ | ||
650 | return 0; | ||
651 | |||
652 | out_err: | ||
653 | bl_put_extent(new); | ||
654 | return -EIO; | ||
655 | } | ||
656 | |||
657 | /* Returns extent, or NULL. If a second READ extent exists, it is returned | ||
658 | * in cow_read, if given. | ||
659 | * | ||
660 | * The extents are kept in two seperate ordered lists, one for READ and NONE, | ||
661 | * one for READWRITE and INVALID. Within each list, we assume: | ||
662 | * 1. Extents are ordered by file offset. | ||
663 | * 2. For any given isect, there is at most one extents that matches. | ||
664 | */ | ||
665 | struct pnfs_block_extent * | ||
666 | bl_find_get_extent(struct pnfs_block_layout *bl, sector_t isect, | ||
667 | struct pnfs_block_extent **cow_read) | ||
668 | { | ||
669 | struct pnfs_block_extent *be, *cow, *ret; | ||
670 | int i; | ||
671 | |||
672 | dprintk("%s enter with isect %llu\n", __func__, (u64)isect); | ||
673 | cow = ret = NULL; | ||
674 | spin_lock(&bl->bl_ext_lock); | ||
675 | for (i = 0; i < EXTENT_LISTS; i++) { | ||
676 | list_for_each_entry_reverse(be, &bl->bl_extents[i], be_node) { | ||
677 | if (isect >= be->be_f_offset + be->be_length) | ||
678 | break; | ||
679 | if (isect >= be->be_f_offset) { | ||
680 | /* We have found an extent */ | ||
681 | dprintk("%s Get %p (%i)\n", __func__, be, | ||
682 | atomic_read(&be->be_refcnt.refcount)); | ||
683 | kref_get(&be->be_refcnt); | ||
684 | if (!ret) | ||
685 | ret = be; | ||
686 | else if (be->be_state != PNFS_BLOCK_READ_DATA) | ||
687 | bl_put_extent(be); | ||
688 | else | ||
689 | cow = be; | ||
690 | break; | ||
691 | } | ||
692 | } | ||
693 | if (ret && | ||
694 | (!cow_read || ret->be_state != PNFS_BLOCK_INVALID_DATA)) | ||
695 | break; | ||
696 | } | ||
697 | spin_unlock(&bl->bl_ext_lock); | ||
698 | if (cow_read) | ||
699 | *cow_read = cow; | ||
700 | print_bl_extent(ret); | ||
701 | return ret; | ||
702 | } | ||
703 | |||
704 | /* Similar to bl_find_get_extent, but called with lock held, and ignores cow */ | ||
705 | static struct pnfs_block_extent * | ||
706 | bl_find_get_extent_locked(struct pnfs_block_layout *bl, sector_t isect) | ||
707 | { | ||
708 | struct pnfs_block_extent *be, *ret = NULL; | ||
709 | int i; | ||
710 | |||
711 | dprintk("%s enter with isect %llu\n", __func__, (u64)isect); | ||
712 | for (i = 0; i < EXTENT_LISTS; i++) { | ||
713 | if (ret) | ||
714 | break; | ||
715 | list_for_each_entry_reverse(be, &bl->bl_extents[i], be_node) { | ||
716 | if (isect >= be->be_f_offset + be->be_length) | ||
717 | break; | ||
718 | if (isect >= be->be_f_offset) { | ||
719 | /* We have found an extent */ | ||
720 | dprintk("%s Get %p (%i)\n", __func__, be, | ||
721 | atomic_read(&be->be_refcnt.refcount)); | ||
722 | kref_get(&be->be_refcnt); | ||
723 | ret = be; | ||
724 | break; | ||
725 | } | ||
726 | } | ||
727 | } | ||
728 | print_bl_extent(ret); | ||
729 | return ret; | ||
730 | } | ||
731 | |||
732 | int | ||
733 | encode_pnfs_block_layoutupdate(struct pnfs_block_layout *bl, | ||
734 | struct xdr_stream *xdr, | ||
735 | const struct nfs4_layoutcommit_args *arg) | ||
736 | { | ||
737 | struct pnfs_block_short_extent *lce, *save; | ||
738 | unsigned int count = 0; | ||
739 | __be32 *p, *xdr_start; | ||
740 | |||
741 | dprintk("%s enter\n", __func__); | ||
742 | /* BUG - creation of bl_commit is buggy - need to wait for | ||
743 | * entire block to be marked WRITTEN before it can be added. | ||
744 | */ | ||
745 | spin_lock(&bl->bl_ext_lock); | ||
746 | /* Want to adjust for possible truncate */ | ||
747 | /* We now want to adjust argument range */ | ||
748 | |||
749 | /* XDR encode the ranges found */ | ||
750 | xdr_start = xdr_reserve_space(xdr, 8); | ||
751 | if (!xdr_start) | ||
752 | goto out; | ||
753 | list_for_each_entry_safe(lce, save, &bl->bl_commit, bse_node) { | ||
754 | p = xdr_reserve_space(xdr, 7 * 4 + sizeof(lce->bse_devid.data)); | ||
755 | if (!p) | ||
756 | break; | ||
757 | p = xdr_encode_opaque_fixed(p, lce->bse_devid.data, NFS4_DEVICEID4_SIZE); | ||
758 | p = xdr_encode_hyper(p, lce->bse_f_offset << SECTOR_SHIFT); | ||
759 | p = xdr_encode_hyper(p, lce->bse_length << SECTOR_SHIFT); | ||
760 | p = xdr_encode_hyper(p, 0LL); | ||
761 | *p++ = cpu_to_be32(PNFS_BLOCK_READWRITE_DATA); | ||
762 | list_del(&lce->bse_node); | ||
763 | list_add_tail(&lce->bse_node, &bl->bl_committing); | ||
764 | bl->bl_count--; | ||
765 | count++; | ||
766 | } | ||
767 | xdr_start[0] = cpu_to_be32((xdr->p - xdr_start - 1) * 4); | ||
768 | xdr_start[1] = cpu_to_be32(count); | ||
769 | out: | ||
770 | spin_unlock(&bl->bl_ext_lock); | ||
771 | dprintk("%s found %i ranges\n", __func__, count); | ||
772 | return 0; | ||
773 | } | ||
774 | |||
775 | /* Helper function to set_to_rw that initialize a new extent */ | ||
776 | static void | ||
777 | _prep_new_extent(struct pnfs_block_extent *new, | ||
778 | struct pnfs_block_extent *orig, | ||
779 | sector_t offset, sector_t length, int state) | ||
780 | { | ||
781 | kref_init(&new->be_refcnt); | ||
782 | /* don't need to INIT_LIST_HEAD(&new->be_node) */ | ||
783 | memcpy(&new->be_devid, &orig->be_devid, sizeof(struct nfs4_deviceid)); | ||
784 | new->be_mdev = orig->be_mdev; | ||
785 | new->be_f_offset = offset; | ||
786 | new->be_length = length; | ||
787 | new->be_v_offset = orig->be_v_offset - orig->be_f_offset + offset; | ||
788 | new->be_state = state; | ||
789 | new->be_inval = orig->be_inval; | ||
790 | } | ||
791 | |||
792 | /* Tries to merge be with extent in front of it in list. | ||
793 | * Frees storage if not used. | ||
794 | */ | ||
795 | static struct pnfs_block_extent * | ||
796 | _front_merge(struct pnfs_block_extent *be, struct list_head *head, | ||
797 | struct pnfs_block_extent *storage) | ||
798 | { | ||
799 | struct pnfs_block_extent *prev; | ||
800 | |||
801 | if (!storage) | ||
802 | goto no_merge; | ||
803 | if (&be->be_node == head || be->be_node.prev == head) | ||
804 | goto no_merge; | ||
805 | prev = list_entry(be->be_node.prev, struct pnfs_block_extent, be_node); | ||
806 | if ((prev->be_f_offset + prev->be_length != be->be_f_offset) || | ||
807 | !extents_consistent(prev, be)) | ||
808 | goto no_merge; | ||
809 | _prep_new_extent(storage, prev, prev->be_f_offset, | ||
810 | prev->be_length + be->be_length, prev->be_state); | ||
811 | list_replace(&prev->be_node, &storage->be_node); | ||
812 | bl_put_extent(prev); | ||
813 | list_del(&be->be_node); | ||
814 | bl_put_extent(be); | ||
815 | return storage; | ||
816 | |||
817 | no_merge: | ||
818 | kfree(storage); | ||
819 | return be; | ||
820 | } | ||
821 | |||
822 | static u64 | ||
823 | set_to_rw(struct pnfs_block_layout *bl, u64 offset, u64 length) | ||
824 | { | ||
825 | u64 rv = offset + length; | ||
826 | struct pnfs_block_extent *be, *e1, *e2, *e3, *new, *old; | ||
827 | struct pnfs_block_extent *children[3]; | ||
828 | struct pnfs_block_extent *merge1 = NULL, *merge2 = NULL; | ||
829 | int i = 0, j; | ||
830 | |||
831 | dprintk("%s(%llu, %llu)\n", __func__, offset, length); | ||
832 | /* Create storage for up to three new extents e1, e2, e3 */ | ||
833 | e1 = kmalloc(sizeof(*e1), GFP_ATOMIC); | ||
834 | e2 = kmalloc(sizeof(*e2), GFP_ATOMIC); | ||
835 | e3 = kmalloc(sizeof(*e3), GFP_ATOMIC); | ||
836 | /* BUG - we are ignoring any failure */ | ||
837 | if (!e1 || !e2 || !e3) | ||
838 | goto out_nosplit; | ||
839 | |||
840 | spin_lock(&bl->bl_ext_lock); | ||
841 | be = bl_find_get_extent_locked(bl, offset); | ||
842 | rv = be->be_f_offset + be->be_length; | ||
843 | if (be->be_state != PNFS_BLOCK_INVALID_DATA) { | ||
844 | spin_unlock(&bl->bl_ext_lock); | ||
845 | goto out_nosplit; | ||
846 | } | ||
847 | /* Add e* to children, bumping e*'s krefs */ | ||
848 | if (be->be_f_offset != offset) { | ||
849 | _prep_new_extent(e1, be, be->be_f_offset, | ||
850 | offset - be->be_f_offset, | ||
851 | PNFS_BLOCK_INVALID_DATA); | ||
852 | children[i++] = e1; | ||
853 | print_bl_extent(e1); | ||
854 | } else | ||
855 | merge1 = e1; | ||
856 | _prep_new_extent(e2, be, offset, | ||
857 | min(length, be->be_f_offset + be->be_length - offset), | ||
858 | PNFS_BLOCK_READWRITE_DATA); | ||
859 | children[i++] = e2; | ||
860 | print_bl_extent(e2); | ||
861 | if (offset + length < be->be_f_offset + be->be_length) { | ||
862 | _prep_new_extent(e3, be, e2->be_f_offset + e2->be_length, | ||
863 | be->be_f_offset + be->be_length - | ||
864 | offset - length, | ||
865 | PNFS_BLOCK_INVALID_DATA); | ||
866 | children[i++] = e3; | ||
867 | print_bl_extent(e3); | ||
868 | } else | ||
869 | merge2 = e3; | ||
870 | |||
871 | /* Remove be from list, and insert the e* */ | ||
872 | /* We don't get refs on e*, since this list is the base reference | ||
873 | * set when init'ed. | ||
874 | */ | ||
875 | if (i < 3) | ||
876 | children[i] = NULL; | ||
877 | new = children[0]; | ||
878 | list_replace(&be->be_node, &new->be_node); | ||
879 | bl_put_extent(be); | ||
880 | new = _front_merge(new, &bl->bl_extents[RW_EXTENT], merge1); | ||
881 | for (j = 1; j < i; j++) { | ||
882 | old = new; | ||
883 | new = children[j]; | ||
884 | list_add(&new->be_node, &old->be_node); | ||
885 | } | ||
886 | if (merge2) { | ||
887 | /* This is a HACK, should just create a _back_merge function */ | ||
888 | new = list_entry(new->be_node.next, | ||
889 | struct pnfs_block_extent, be_node); | ||
890 | new = _front_merge(new, &bl->bl_extents[RW_EXTENT], merge2); | ||
891 | } | ||
892 | spin_unlock(&bl->bl_ext_lock); | ||
893 | |||
894 | /* Since we removed the base reference above, be is now scheduled for | ||
895 | * destruction. | ||
896 | */ | ||
897 | bl_put_extent(be); | ||
898 | dprintk("%s returns %llu after split\n", __func__, rv); | ||
899 | return rv; | ||
900 | |||
901 | out_nosplit: | ||
902 | kfree(e1); | ||
903 | kfree(e2); | ||
904 | kfree(e3); | ||
905 | dprintk("%s returns %llu without splitting\n", __func__, rv); | ||
906 | return rv; | ||
907 | } | ||
908 | |||
909 | void | ||
910 | clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl, | ||
911 | const struct nfs4_layoutcommit_args *arg, | ||
912 | int status) | ||
913 | { | ||
914 | struct pnfs_block_short_extent *lce, *save; | ||
915 | |||
916 | dprintk("%s status %d\n", __func__, status); | ||
917 | list_for_each_entry_safe(lce, save, &bl->bl_committing, bse_node) { | ||
918 | if (likely(!status)) { | ||
919 | u64 offset = lce->bse_f_offset; | ||
920 | u64 end = offset + lce->bse_length; | ||
921 | |||
922 | do { | ||
923 | offset = set_to_rw(bl, offset, end - offset); | ||
924 | } while (offset < end); | ||
925 | list_del(&lce->bse_node); | ||
926 | |||
927 | kfree(lce); | ||
928 | } else { | ||
929 | list_del(&lce->bse_node); | ||
930 | spin_lock(&bl->bl_ext_lock); | ||
931 | add_to_commitlist(bl, lce); | ||
932 | spin_unlock(&bl->bl_ext_lock); | ||
933 | } | ||
934 | } | ||
935 | } | ||
diff --git a/fs/nfs/cache_lib.c b/fs/nfs/cache_lib.c index 84690319e62..c98b439332f 100644 --- a/fs/nfs/cache_lib.c +++ b/fs/nfs/cache_lib.c | |||
@@ -113,19 +113,18 @@ int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq) | |||
113 | 113 | ||
114 | int nfs_cache_register(struct cache_detail *cd) | 114 | int nfs_cache_register(struct cache_detail *cd) |
115 | { | 115 | { |
116 | struct nameidata nd; | ||
117 | struct vfsmount *mnt; | 116 | struct vfsmount *mnt; |
117 | struct path path; | ||
118 | int ret; | 118 | int ret; |
119 | 119 | ||
120 | mnt = rpc_get_mount(); | 120 | mnt = rpc_get_mount(); |
121 | if (IS_ERR(mnt)) | 121 | if (IS_ERR(mnt)) |
122 | return PTR_ERR(mnt); | 122 | return PTR_ERR(mnt); |
123 | ret = vfs_path_lookup(mnt->mnt_root, mnt, "/cache", 0, &nd); | 123 | ret = vfs_path_lookup(mnt->mnt_root, mnt, "/cache", 0, &path); |
124 | if (ret) | 124 | if (ret) |
125 | goto err; | 125 | goto err; |
126 | ret = sunrpc_cache_register_pipefs(nd.path.dentry, | 126 | ret = sunrpc_cache_register_pipefs(path.dentry, cd->name, 0600, cd); |
127 | cd->name, 0600, cd); | 127 | path_put(&path); |
128 | path_put(&nd.path); | ||
129 | if (!ret) | 128 | if (!ret) |
130 | return ret; | 129 | return ret; |
131 | err: | 130 | err: |
diff --git a/fs/nfs/cache_lib.h b/fs/nfs/cache_lib.h index 76f856e284e..7cf6cafcc00 100644 --- a/fs/nfs/cache_lib.h +++ b/fs/nfs/cache_lib.h | |||
@@ -6,7 +6,7 @@ | |||
6 | 6 | ||
7 | #include <linux/completion.h> | 7 | #include <linux/completion.h> |
8 | #include <linux/sunrpc/cache.h> | 8 | #include <linux/sunrpc/cache.h> |
9 | #include <asm/atomic.h> | 9 | #include <linux/atomic.h> |
10 | 10 | ||
11 | /* | 11 | /* |
12 | * Deferred request handling | 12 | * Deferred request handling |
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index b257383bb56..07df5f1d85e 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h | |||
@@ -38,6 +38,7 @@ enum nfs4_callback_opnum { | |||
38 | struct cb_process_state { | 38 | struct cb_process_state { |
39 | __be32 drc_status; | 39 | __be32 drc_status; |
40 | struct nfs_client *clp; | 40 | struct nfs_client *clp; |
41 | int slotid; | ||
41 | }; | 42 | }; |
42 | 43 | ||
43 | struct cb_compound_hdr_arg { | 44 | struct cb_compound_hdr_arg { |
@@ -166,7 +167,6 @@ extern unsigned nfs4_callback_layoutrecall( | |||
166 | void *dummy, struct cb_process_state *cps); | 167 | void *dummy, struct cb_process_state *cps); |
167 | 168 | ||
168 | extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses); | 169 | extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses); |
169 | extern void nfs4_cb_take_slot(struct nfs_client *clp); | ||
170 | 170 | ||
171 | struct cb_devicenotifyitem { | 171 | struct cb_devicenotifyitem { |
172 | uint32_t cbd_notify_type; | 172 | uint32_t cbd_notify_type; |
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index d4d1954e9bb..54cea8ad5a7 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c | |||
@@ -111,6 +111,7 @@ int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nf | |||
111 | static u32 initiate_file_draining(struct nfs_client *clp, | 111 | static u32 initiate_file_draining(struct nfs_client *clp, |
112 | struct cb_layoutrecallargs *args) | 112 | struct cb_layoutrecallargs *args) |
113 | { | 113 | { |
114 | struct nfs_server *server; | ||
114 | struct pnfs_layout_hdr *lo; | 115 | struct pnfs_layout_hdr *lo; |
115 | struct inode *ino; | 116 | struct inode *ino; |
116 | bool found = false; | 117 | bool found = false; |
@@ -118,21 +119,28 @@ static u32 initiate_file_draining(struct nfs_client *clp, | |||
118 | LIST_HEAD(free_me_list); | 119 | LIST_HEAD(free_me_list); |
119 | 120 | ||
120 | spin_lock(&clp->cl_lock); | 121 | spin_lock(&clp->cl_lock); |
121 | list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) { | 122 | rcu_read_lock(); |
122 | if (nfs_compare_fh(&args->cbl_fh, | 123 | list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { |
123 | &NFS_I(lo->plh_inode)->fh)) | 124 | list_for_each_entry(lo, &server->layouts, plh_layouts) { |
124 | continue; | 125 | if (nfs_compare_fh(&args->cbl_fh, |
125 | ino = igrab(lo->plh_inode); | 126 | &NFS_I(lo->plh_inode)->fh)) |
126 | if (!ino) | 127 | continue; |
127 | continue; | 128 | ino = igrab(lo->plh_inode); |
128 | found = true; | 129 | if (!ino) |
129 | /* Without this, layout can be freed as soon | 130 | continue; |
130 | * as we release cl_lock. | 131 | found = true; |
131 | */ | 132 | /* Without this, layout can be freed as soon |
132 | get_layout_hdr(lo); | 133 | * as we release cl_lock. |
133 | break; | 134 | */ |
135 | get_layout_hdr(lo); | ||
136 | break; | ||
137 | } | ||
138 | if (found) | ||
139 | break; | ||
134 | } | 140 | } |
141 | rcu_read_unlock(); | ||
135 | spin_unlock(&clp->cl_lock); | 142 | spin_unlock(&clp->cl_lock); |
143 | |||
136 | if (!found) | 144 | if (!found) |
137 | return NFS4ERR_NOMATCHING_LAYOUT; | 145 | return NFS4ERR_NOMATCHING_LAYOUT; |
138 | 146 | ||
@@ -154,6 +162,7 @@ static u32 initiate_file_draining(struct nfs_client *clp, | |||
154 | static u32 initiate_bulk_draining(struct nfs_client *clp, | 162 | static u32 initiate_bulk_draining(struct nfs_client *clp, |
155 | struct cb_layoutrecallargs *args) | 163 | struct cb_layoutrecallargs *args) |
156 | { | 164 | { |
165 | struct nfs_server *server; | ||
157 | struct pnfs_layout_hdr *lo; | 166 | struct pnfs_layout_hdr *lo; |
158 | struct inode *ino; | 167 | struct inode *ino; |
159 | u32 rv = NFS4ERR_NOMATCHING_LAYOUT; | 168 | u32 rv = NFS4ERR_NOMATCHING_LAYOUT; |
@@ -167,18 +176,24 @@ static u32 initiate_bulk_draining(struct nfs_client *clp, | |||
167 | }; | 176 | }; |
168 | 177 | ||
169 | spin_lock(&clp->cl_lock); | 178 | spin_lock(&clp->cl_lock); |
170 | list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) { | 179 | rcu_read_lock(); |
180 | list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { | ||
171 | if ((args->cbl_recall_type == RETURN_FSID) && | 181 | if ((args->cbl_recall_type == RETURN_FSID) && |
172 | memcmp(&NFS_SERVER(lo->plh_inode)->fsid, | 182 | memcmp(&server->fsid, &args->cbl_fsid, |
173 | &args->cbl_fsid, sizeof(struct nfs_fsid))) | 183 | sizeof(struct nfs_fsid))) |
174 | continue; | ||
175 | if (!igrab(lo->plh_inode)) | ||
176 | continue; | 184 | continue; |
177 | get_layout_hdr(lo); | 185 | |
178 | BUG_ON(!list_empty(&lo->plh_bulk_recall)); | 186 | list_for_each_entry(lo, &server->layouts, plh_layouts) { |
179 | list_add(&lo->plh_bulk_recall, &recall_list); | 187 | if (!igrab(lo->plh_inode)) |
188 | continue; | ||
189 | get_layout_hdr(lo); | ||
190 | BUG_ON(!list_empty(&lo->plh_bulk_recall)); | ||
191 | list_add(&lo->plh_bulk_recall, &recall_list); | ||
192 | } | ||
180 | } | 193 | } |
194 | rcu_read_unlock(); | ||
181 | spin_unlock(&clp->cl_lock); | 195 | spin_unlock(&clp->cl_lock); |
196 | |||
182 | list_for_each_entry_safe(lo, tmp, | 197 | list_for_each_entry_safe(lo, tmp, |
183 | &recall_list, plh_bulk_recall) { | 198 | &recall_list, plh_bulk_recall) { |
184 | ino = lo->plh_inode; | 199 | ino = lo->plh_inode; |
@@ -324,7 +339,7 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args) | |||
324 | dprintk("%s enter. slotid %d seqid %d\n", | 339 | dprintk("%s enter. slotid %d seqid %d\n", |
325 | __func__, args->csa_slotid, args->csa_sequenceid); | 340 | __func__, args->csa_slotid, args->csa_sequenceid); |
326 | 341 | ||
327 | if (args->csa_slotid > NFS41_BC_MAX_CALLBACKS) | 342 | if (args->csa_slotid >= NFS41_BC_MAX_CALLBACKS) |
328 | return htonl(NFS4ERR_BADSLOT); | 343 | return htonl(NFS4ERR_BADSLOT); |
329 | 344 | ||
330 | slot = tbl->slots + args->csa_slotid; | 345 | slot = tbl->slots + args->csa_slotid; |
@@ -333,7 +348,7 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args) | |||
333 | /* Normal */ | 348 | /* Normal */ |
334 | if (likely(args->csa_sequenceid == slot->seq_nr + 1)) { | 349 | if (likely(args->csa_sequenceid == slot->seq_nr + 1)) { |
335 | slot->seq_nr++; | 350 | slot->seq_nr++; |
336 | return htonl(NFS4_OK); | 351 | goto out_ok; |
337 | } | 352 | } |
338 | 353 | ||
339 | /* Replay */ | 354 | /* Replay */ |
@@ -352,11 +367,14 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args) | |||
352 | /* Wraparound */ | 367 | /* Wraparound */ |
353 | if (args->csa_sequenceid == 1 && (slot->seq_nr + 1) == 0) { | 368 | if (args->csa_sequenceid == 1 && (slot->seq_nr + 1) == 0) { |
354 | slot->seq_nr = 1; | 369 | slot->seq_nr = 1; |
355 | return htonl(NFS4_OK); | 370 | goto out_ok; |
356 | } | 371 | } |
357 | 372 | ||
358 | /* Misordered request */ | 373 | /* Misordered request */ |
359 | return htonl(NFS4ERR_SEQ_MISORDERED); | 374 | return htonl(NFS4ERR_SEQ_MISORDERED); |
375 | out_ok: | ||
376 | tbl->highest_used_slotid = args->csa_slotid; | ||
377 | return htonl(NFS4_OK); | ||
360 | } | 378 | } |
361 | 379 | ||
362 | /* | 380 | /* |
@@ -418,26 +436,37 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, | |||
418 | struct cb_sequenceres *res, | 436 | struct cb_sequenceres *res, |
419 | struct cb_process_state *cps) | 437 | struct cb_process_state *cps) |
420 | { | 438 | { |
439 | struct nfs4_slot_table *tbl; | ||
421 | struct nfs_client *clp; | 440 | struct nfs_client *clp; |
422 | int i; | 441 | int i; |
423 | __be32 status = htonl(NFS4ERR_BADSESSION); | 442 | __be32 status = htonl(NFS4ERR_BADSESSION); |
424 | 443 | ||
425 | cps->clp = NULL; | ||
426 | |||
427 | clp = nfs4_find_client_sessionid(args->csa_addr, &args->csa_sessionid); | 444 | clp = nfs4_find_client_sessionid(args->csa_addr, &args->csa_sessionid); |
428 | if (clp == NULL) | 445 | if (clp == NULL) |
429 | goto out; | 446 | goto out; |
430 | 447 | ||
448 | tbl = &clp->cl_session->bc_slot_table; | ||
449 | |||
450 | spin_lock(&tbl->slot_tbl_lock); | ||
431 | /* state manager is resetting the session */ | 451 | /* state manager is resetting the session */ |
432 | if (test_bit(NFS4_SESSION_DRAINING, &clp->cl_session->session_state)) { | 452 | if (test_bit(NFS4_SESSION_DRAINING, &clp->cl_session->session_state)) { |
433 | status = NFS4ERR_DELAY; | 453 | spin_unlock(&tbl->slot_tbl_lock); |
454 | status = htonl(NFS4ERR_DELAY); | ||
455 | /* Return NFS4ERR_BADSESSION if we're draining the session | ||
456 | * in order to reset it. | ||
457 | */ | ||
458 | if (test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)) | ||
459 | status = htonl(NFS4ERR_BADSESSION); | ||
434 | goto out; | 460 | goto out; |
435 | } | 461 | } |
436 | 462 | ||
437 | status = validate_seqid(&clp->cl_session->bc_slot_table, args); | 463 | status = validate_seqid(&clp->cl_session->bc_slot_table, args); |
464 | spin_unlock(&tbl->slot_tbl_lock); | ||
438 | if (status) | 465 | if (status) |
439 | goto out; | 466 | goto out; |
440 | 467 | ||
468 | cps->slotid = args->csa_slotid; | ||
469 | |||
441 | /* | 470 | /* |
442 | * Check for pending referring calls. If a match is found, a | 471 | * Check for pending referring calls. If a match is found, a |
443 | * related callback was received before the response to the original | 472 | * related callback was received before the response to the original |
@@ -454,7 +483,6 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, | |||
454 | res->csr_slotid = args->csa_slotid; | 483 | res->csr_slotid = args->csa_slotid; |
455 | res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; | 484 | res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; |
456 | res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; | 485 | res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; |
457 | nfs4_cb_take_slot(clp); | ||
458 | 486 | ||
459 | out: | 487 | out: |
460 | cps->clp = clp; /* put in nfs4_callback_compound */ | 488 | cps->clp = clp; /* put in nfs4_callback_compound */ |
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index c6c86a77e04..918ad647afe 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c | |||
@@ -754,26 +754,15 @@ static void nfs4_callback_free_slot(struct nfs4_session *session) | |||
754 | * Let the state manager know callback processing done. | 754 | * Let the state manager know callback processing done. |
755 | * A single slot, so highest used slotid is either 0 or -1 | 755 | * A single slot, so highest used slotid is either 0 or -1 |
756 | */ | 756 | */ |
757 | tbl->highest_used_slotid--; | 757 | tbl->highest_used_slotid = -1; |
758 | nfs4_check_drain_bc_complete(session); | 758 | nfs4_check_drain_bc_complete(session); |
759 | spin_unlock(&tbl->slot_tbl_lock); | 759 | spin_unlock(&tbl->slot_tbl_lock); |
760 | } | 760 | } |
761 | 761 | ||
762 | static void nfs4_cb_free_slot(struct nfs_client *clp) | 762 | static void nfs4_cb_free_slot(struct cb_process_state *cps) |
763 | { | 763 | { |
764 | if (clp && clp->cl_session) | 764 | if (cps->slotid != -1) |
765 | nfs4_callback_free_slot(clp->cl_session); | 765 | nfs4_callback_free_slot(cps->clp->cl_session); |
766 | } | ||
767 | |||
768 | /* A single slot, so highest used slotid is either 0 or -1 */ | ||
769 | void nfs4_cb_take_slot(struct nfs_client *clp) | ||
770 | { | ||
771 | struct nfs4_slot_table *tbl = &clp->cl_session->bc_slot_table; | ||
772 | |||
773 | spin_lock(&tbl->slot_tbl_lock); | ||
774 | tbl->highest_used_slotid++; | ||
775 | BUG_ON(tbl->highest_used_slotid != 0); | ||
776 | spin_unlock(&tbl->slot_tbl_lock); | ||
777 | } | 766 | } |
778 | 767 | ||
779 | #else /* CONFIG_NFS_V4_1 */ | 768 | #else /* CONFIG_NFS_V4_1 */ |
@@ -784,7 +773,7 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op) | |||
784 | return htonl(NFS4ERR_MINOR_VERS_MISMATCH); | 773 | return htonl(NFS4ERR_MINOR_VERS_MISMATCH); |
785 | } | 774 | } |
786 | 775 | ||
787 | static void nfs4_cb_free_slot(struct nfs_client *clp) | 776 | static void nfs4_cb_free_slot(struct cb_process_state *cps) |
788 | { | 777 | { |
789 | } | 778 | } |
790 | #endif /* CONFIG_NFS_V4_1 */ | 779 | #endif /* CONFIG_NFS_V4_1 */ |
@@ -866,6 +855,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r | |||
866 | struct cb_process_state cps = { | 855 | struct cb_process_state cps = { |
867 | .drc_status = 0, | 856 | .drc_status = 0, |
868 | .clp = NULL, | 857 | .clp = NULL, |
858 | .slotid = -1, | ||
869 | }; | 859 | }; |
870 | unsigned int nops = 0; | 860 | unsigned int nops = 0; |
871 | 861 | ||
@@ -906,7 +896,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r | |||
906 | 896 | ||
907 | *hdr_res.status = status; | 897 | *hdr_res.status = status; |
908 | *hdr_res.nops = htonl(nops); | 898 | *hdr_res.nops = htonl(nops); |
909 | nfs4_cb_free_slot(cps.clp); | 899 | nfs4_cb_free_slot(&cps); |
910 | nfs_put_client(cps.clp); | 900 | nfs_put_client(cps.clp); |
911 | dprintk("%s: done, status = %u\n", __func__, ntohl(status)); | 901 | dprintk("%s: done, status = %u\n", __func__, ntohl(status)); |
912 | return rpc_success; | 902 | return rpc_success; |
diff --git a/fs/nfs/client.c b/fs/nfs/client.c index b3dc2b88b65..5833fbbf59b 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c | |||
@@ -105,7 +105,7 @@ struct rpc_program nfs_program = { | |||
105 | .nrvers = ARRAY_SIZE(nfs_version), | 105 | .nrvers = ARRAY_SIZE(nfs_version), |
106 | .version = nfs_version, | 106 | .version = nfs_version, |
107 | .stats = &nfs_rpcstat, | 107 | .stats = &nfs_rpcstat, |
108 | .pipe_dir_name = "/nfs", | 108 | .pipe_dir_name = NFS_PIPE_DIRNAME, |
109 | }; | 109 | }; |
110 | 110 | ||
111 | struct rpc_stat nfs_rpcstat = { | 111 | struct rpc_stat nfs_rpcstat = { |
@@ -188,9 +188,6 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ | |||
188 | cred = rpc_lookup_machine_cred(); | 188 | cred = rpc_lookup_machine_cred(); |
189 | if (!IS_ERR(cred)) | 189 | if (!IS_ERR(cred)) |
190 | clp->cl_machine_cred = cred; | 190 | clp->cl_machine_cred = cred; |
191 | #if defined(CONFIG_NFS_V4_1) | ||
192 | INIT_LIST_HEAD(&clp->cl_layouts); | ||
193 | #endif | ||
194 | nfs_fscache_get_client_cookie(clp); | 191 | nfs_fscache_get_client_cookie(clp); |
195 | 192 | ||
196 | return clp; | 193 | return clp; |
@@ -293,6 +290,7 @@ static void nfs_free_client(struct nfs_client *clp) | |||
293 | nfs4_deviceid_purge_client(clp); | 290 | nfs4_deviceid_purge_client(clp); |
294 | 291 | ||
295 | kfree(clp->cl_hostname); | 292 | kfree(clp->cl_hostname); |
293 | kfree(clp->server_scope); | ||
296 | kfree(clp); | 294 | kfree(clp); |
297 | 295 | ||
298 | dprintk("<-- nfs_free_client()\n"); | 296 | dprintk("<-- nfs_free_client()\n"); |
@@ -906,7 +904,9 @@ error: | |||
906 | /* | 904 | /* |
907 | * Load up the server record from information gained in an fsinfo record | 905 | * Load up the server record from information gained in an fsinfo record |
908 | */ | 906 | */ |
909 | static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *fsinfo) | 907 | static void nfs_server_set_fsinfo(struct nfs_server *server, |
908 | struct nfs_fh *mntfh, | ||
909 | struct nfs_fsinfo *fsinfo) | ||
910 | { | 910 | { |
911 | unsigned long max_rpc_payload; | 911 | unsigned long max_rpc_payload; |
912 | 912 | ||
@@ -936,7 +936,8 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo * | |||
936 | if (server->wsize > NFS_MAX_FILE_IO_SIZE) | 936 | if (server->wsize > NFS_MAX_FILE_IO_SIZE) |
937 | server->wsize = NFS_MAX_FILE_IO_SIZE; | 937 | server->wsize = NFS_MAX_FILE_IO_SIZE; |
938 | server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 938 | server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
939 | set_pnfs_layoutdriver(server, fsinfo->layouttype); | 939 | server->pnfs_blksize = fsinfo->blksize; |
940 | set_pnfs_layoutdriver(server, mntfh, fsinfo->layouttype); | ||
940 | 941 | ||
941 | server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL); | 942 | server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL); |
942 | 943 | ||
@@ -982,7 +983,7 @@ static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, str | |||
982 | if (error < 0) | 983 | if (error < 0) |
983 | goto out_error; | 984 | goto out_error; |
984 | 985 | ||
985 | nfs_server_set_fsinfo(server, &fsinfo); | 986 | nfs_server_set_fsinfo(server, mntfh, &fsinfo); |
986 | 987 | ||
987 | /* Get some general file system info */ | 988 | /* Get some general file system info */ |
988 | if (server->namelen == 0) { | 989 | if (server->namelen == 0) { |
@@ -1062,6 +1063,7 @@ static struct nfs_server *nfs_alloc_server(void) | |||
1062 | INIT_LIST_HEAD(&server->client_link); | 1063 | INIT_LIST_HEAD(&server->client_link); |
1063 | INIT_LIST_HEAD(&server->master_link); | 1064 | INIT_LIST_HEAD(&server->master_link); |
1064 | INIT_LIST_HEAD(&server->delegations); | 1065 | INIT_LIST_HEAD(&server->delegations); |
1066 | INIT_LIST_HEAD(&server->layouts); | ||
1065 | 1067 | ||
1066 | atomic_set(&server->active, 0); | 1068 | atomic_set(&server->active, 0); |
1067 | 1069 | ||
@@ -1464,7 +1466,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, | |||
1464 | dprintk("<-- %s %p\n", __func__, clp); | 1466 | dprintk("<-- %s %p\n", __func__, clp); |
1465 | return clp; | 1467 | return clp; |
1466 | } | 1468 | } |
1467 | EXPORT_SYMBOL(nfs4_set_ds_client); | 1469 | EXPORT_SYMBOL_GPL(nfs4_set_ds_client); |
1468 | 1470 | ||
1469 | /* | 1471 | /* |
1470 | * Session has been established, and the client marked ready. | 1472 | * Session has been established, and the client marked ready. |
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index dd25c2aec37..321a66bc384 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c | |||
@@ -398,12 +398,11 @@ int nfs_inode_return_delegation(struct inode *inode) | |||
398 | return err; | 398 | return err; |
399 | } | 399 | } |
400 | 400 | ||
401 | static void nfs_mark_return_delegation(struct nfs_delegation *delegation) | 401 | static void nfs_mark_return_delegation(struct nfs_server *server, |
402 | struct nfs_delegation *delegation) | ||
402 | { | 403 | { |
403 | struct nfs_client *clp = NFS_SERVER(delegation->inode)->nfs_client; | ||
404 | |||
405 | set_bit(NFS_DELEGATION_RETURN, &delegation->flags); | 404 | set_bit(NFS_DELEGATION_RETURN, &delegation->flags); |
406 | set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state); | 405 | set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state); |
407 | } | 406 | } |
408 | 407 | ||
409 | /** | 408 | /** |
@@ -441,7 +440,7 @@ static void nfs_mark_return_all_delegation_types(struct nfs_server *server, | |||
441 | if ((delegation->type == (FMODE_READ|FMODE_WRITE)) && !(flags & FMODE_WRITE)) | 440 | if ((delegation->type == (FMODE_READ|FMODE_WRITE)) && !(flags & FMODE_WRITE)) |
442 | continue; | 441 | continue; |
443 | if (delegation->type & flags) | 442 | if (delegation->type & flags) |
444 | nfs_mark_return_delegation(delegation); | 443 | nfs_mark_return_delegation(server, delegation); |
445 | } | 444 | } |
446 | } | 445 | } |
447 | 446 | ||
@@ -508,7 +507,7 @@ static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server) | |||
508 | list_for_each_entry_rcu(delegation, &server->delegations, super_list) { | 507 | list_for_each_entry_rcu(delegation, &server->delegations, super_list) { |
509 | if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags)) | 508 | if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags)) |
510 | continue; | 509 | continue; |
511 | nfs_mark_return_delegation(delegation); | 510 | nfs_mark_return_delegation(server, delegation); |
512 | } | 511 | } |
513 | } | 512 | } |
514 | 513 | ||
@@ -539,7 +538,8 @@ void nfs_expire_unreferenced_delegations(struct nfs_client *clp) | |||
539 | int nfs_async_inode_return_delegation(struct inode *inode, | 538 | int nfs_async_inode_return_delegation(struct inode *inode, |
540 | const nfs4_stateid *stateid) | 539 | const nfs4_stateid *stateid) |
541 | { | 540 | { |
542 | struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; | 541 | struct nfs_server *server = NFS_SERVER(inode); |
542 | struct nfs_client *clp = server->nfs_client; | ||
543 | struct nfs_delegation *delegation; | 543 | struct nfs_delegation *delegation; |
544 | 544 | ||
545 | rcu_read_lock(); | 545 | rcu_read_lock(); |
@@ -549,7 +549,7 @@ int nfs_async_inode_return_delegation(struct inode *inode, | |||
549 | rcu_read_unlock(); | 549 | rcu_read_unlock(); |
550 | return -ENOENT; | 550 | return -ENOENT; |
551 | } | 551 | } |
552 | nfs_mark_return_delegation(delegation); | 552 | nfs_mark_return_delegation(server, delegation); |
553 | rcu_read_unlock(); | 553 | rcu_read_unlock(); |
554 | 554 | ||
555 | nfs_delegation_run_state_manager(clp); | 555 | nfs_delegation_run_state_manager(clp); |
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index ededdbd0db3..ac289909814 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
@@ -56,7 +56,7 @@ static int nfs_link(struct dentry *, struct inode *, struct dentry *); | |||
56 | static int nfs_mknod(struct inode *, struct dentry *, int, dev_t); | 56 | static int nfs_mknod(struct inode *, struct dentry *, int, dev_t); |
57 | static int nfs_rename(struct inode *, struct dentry *, | 57 | static int nfs_rename(struct inode *, struct dentry *, |
58 | struct inode *, struct dentry *); | 58 | struct inode *, struct dentry *); |
59 | static int nfs_fsync_dir(struct file *, int); | 59 | static int nfs_fsync_dir(struct file *, loff_t, loff_t, int); |
60 | static loff_t nfs_llseek_dir(struct file *, loff_t, int); | 60 | static loff_t nfs_llseek_dir(struct file *, loff_t, int); |
61 | static void nfs_readdir_clear_array(struct page*); | 61 | static void nfs_readdir_clear_array(struct page*); |
62 | 62 | ||
@@ -134,18 +134,19 @@ const struct inode_operations nfs4_dir_inode_operations = { | |||
134 | 134 | ||
135 | #endif /* CONFIG_NFS_V4 */ | 135 | #endif /* CONFIG_NFS_V4 */ |
136 | 136 | ||
137 | static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct rpc_cred *cred) | 137 | static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir, struct rpc_cred *cred) |
138 | { | 138 | { |
139 | struct nfs_open_dir_context *ctx; | 139 | struct nfs_open_dir_context *ctx; |
140 | ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); | 140 | ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); |
141 | if (ctx != NULL) { | 141 | if (ctx != NULL) { |
142 | ctx->duped = 0; | 142 | ctx->duped = 0; |
143 | ctx->attr_gencount = NFS_I(dir)->attr_gencount; | ||
143 | ctx->dir_cookie = 0; | 144 | ctx->dir_cookie = 0; |
144 | ctx->dup_cookie = 0; | 145 | ctx->dup_cookie = 0; |
145 | ctx->cred = get_rpccred(cred); | 146 | ctx->cred = get_rpccred(cred); |
146 | } else | 147 | return ctx; |
147 | ctx = ERR_PTR(-ENOMEM); | 148 | } |
148 | return ctx; | 149 | return ERR_PTR(-ENOMEM); |
149 | } | 150 | } |
150 | 151 | ||
151 | static void put_nfs_open_dir_context(struct nfs_open_dir_context *ctx) | 152 | static void put_nfs_open_dir_context(struct nfs_open_dir_context *ctx) |
@@ -173,7 +174,7 @@ nfs_opendir(struct inode *inode, struct file *filp) | |||
173 | cred = rpc_lookup_cred(); | 174 | cred = rpc_lookup_cred(); |
174 | if (IS_ERR(cred)) | 175 | if (IS_ERR(cred)) |
175 | return PTR_ERR(cred); | 176 | return PTR_ERR(cred); |
176 | ctx = alloc_nfs_open_dir_context(cred); | 177 | ctx = alloc_nfs_open_dir_context(inode, cred); |
177 | if (IS_ERR(ctx)) { | 178 | if (IS_ERR(ctx)) { |
178 | res = PTR_ERR(ctx); | 179 | res = PTR_ERR(ctx); |
179 | goto out; | 180 | goto out; |
@@ -323,7 +324,6 @@ int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descri | |||
323 | { | 324 | { |
324 | loff_t diff = desc->file->f_pos - desc->current_index; | 325 | loff_t diff = desc->file->f_pos - desc->current_index; |
325 | unsigned int index; | 326 | unsigned int index; |
326 | struct nfs_open_dir_context *ctx = desc->file->private_data; | ||
327 | 327 | ||
328 | if (diff < 0) | 328 | if (diff < 0) |
329 | goto out_eof; | 329 | goto out_eof; |
@@ -336,7 +336,6 @@ int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descri | |||
336 | index = (unsigned int)diff; | 336 | index = (unsigned int)diff; |
337 | *desc->dir_cookie = array->array[index].cookie; | 337 | *desc->dir_cookie = array->array[index].cookie; |
338 | desc->cache_entry_index = index; | 338 | desc->cache_entry_index = index; |
339 | ctx->duped = 0; | ||
340 | return 0; | 339 | return 0; |
341 | out_eof: | 340 | out_eof: |
342 | desc->eof = 1; | 341 | desc->eof = 1; |
@@ -349,14 +348,34 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des | |||
349 | int i; | 348 | int i; |
350 | loff_t new_pos; | 349 | loff_t new_pos; |
351 | int status = -EAGAIN; | 350 | int status = -EAGAIN; |
352 | struct nfs_open_dir_context *ctx = desc->file->private_data; | ||
353 | 351 | ||
354 | for (i = 0; i < array->size; i++) { | 352 | for (i = 0; i < array->size; i++) { |
355 | if (array->array[i].cookie == *desc->dir_cookie) { | 353 | if (array->array[i].cookie == *desc->dir_cookie) { |
354 | struct nfs_inode *nfsi = NFS_I(desc->file->f_path.dentry->d_inode); | ||
355 | struct nfs_open_dir_context *ctx = desc->file->private_data; | ||
356 | |||
356 | new_pos = desc->current_index + i; | 357 | new_pos = desc->current_index + i; |
357 | if (new_pos < desc->file->f_pos) { | 358 | if (ctx->attr_gencount != nfsi->attr_gencount |
359 | || (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA))) { | ||
360 | ctx->duped = 0; | ||
361 | ctx->attr_gencount = nfsi->attr_gencount; | ||
362 | } else if (new_pos < desc->file->f_pos) { | ||
363 | if (ctx->duped > 0 | ||
364 | && ctx->dup_cookie == *desc->dir_cookie) { | ||
365 | if (printk_ratelimit()) { | ||
366 | pr_notice("NFS: directory %s/%s contains a readdir loop." | ||
367 | "Please contact your server vendor. " | ||
368 | "The file: %s has duplicate cookie %llu\n", | ||
369 | desc->file->f_dentry->d_parent->d_name.name, | ||
370 | desc->file->f_dentry->d_name.name, | ||
371 | array->array[i].string.name, | ||
372 | *desc->dir_cookie); | ||
373 | } | ||
374 | status = -ELOOP; | ||
375 | goto out; | ||
376 | } | ||
358 | ctx->dup_cookie = *desc->dir_cookie; | 377 | ctx->dup_cookie = *desc->dir_cookie; |
359 | ctx->duped = 1; | 378 | ctx->duped = -1; |
360 | } | 379 | } |
361 | desc->file->f_pos = new_pos; | 380 | desc->file->f_pos = new_pos; |
362 | desc->cache_entry_index = i; | 381 | desc->cache_entry_index = i; |
@@ -368,6 +387,7 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des | |||
368 | if (*desc->dir_cookie == array->last_cookie) | 387 | if (*desc->dir_cookie == array->last_cookie) |
369 | desc->eof = 1; | 388 | desc->eof = 1; |
370 | } | 389 | } |
390 | out: | ||
371 | return status; | 391 | return status; |
372 | } | 392 | } |
373 | 393 | ||
@@ -740,19 +760,6 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, | |||
740 | struct nfs_cache_array *array = NULL; | 760 | struct nfs_cache_array *array = NULL; |
741 | struct nfs_open_dir_context *ctx = file->private_data; | 761 | struct nfs_open_dir_context *ctx = file->private_data; |
742 | 762 | ||
743 | if (ctx->duped != 0 && ctx->dup_cookie == *desc->dir_cookie) { | ||
744 | if (printk_ratelimit()) { | ||
745 | pr_notice("NFS: directory %s/%s contains a readdir loop. " | ||
746 | "Please contact your server vendor. " | ||
747 | "Offending cookie: %llu\n", | ||
748 | file->f_dentry->d_parent->d_name.name, | ||
749 | file->f_dentry->d_name.name, | ||
750 | *desc->dir_cookie); | ||
751 | } | ||
752 | res = -ELOOP; | ||
753 | goto out; | ||
754 | } | ||
755 | |||
756 | array = nfs_readdir_get_array(desc->page); | 763 | array = nfs_readdir_get_array(desc->page); |
757 | if (IS_ERR(array)) { | 764 | if (IS_ERR(array)) { |
758 | res = PTR_ERR(array); | 765 | res = PTR_ERR(array); |
@@ -774,6 +781,8 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, | |||
774 | *desc->dir_cookie = array->array[i+1].cookie; | 781 | *desc->dir_cookie = array->array[i+1].cookie; |
775 | else | 782 | else |
776 | *desc->dir_cookie = array->last_cookie; | 783 | *desc->dir_cookie = array->last_cookie; |
784 | if (ctx->duped != 0) | ||
785 | ctx->duped = 1; | ||
777 | } | 786 | } |
778 | if (array->eof_index >= 0) | 787 | if (array->eof_index >= 0) |
779 | desc->eof = 1; | 788 | desc->eof = 1; |
@@ -805,6 +814,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, | |||
805 | struct page *page = NULL; | 814 | struct page *page = NULL; |
806 | int status; | 815 | int status; |
807 | struct inode *inode = desc->file->f_path.dentry->d_inode; | 816 | struct inode *inode = desc->file->f_path.dentry->d_inode; |
817 | struct nfs_open_dir_context *ctx = desc->file->private_data; | ||
808 | 818 | ||
809 | dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n", | 819 | dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n", |
810 | (unsigned long long)*desc->dir_cookie); | 820 | (unsigned long long)*desc->dir_cookie); |
@@ -818,6 +828,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, | |||
818 | desc->page_index = 0; | 828 | desc->page_index = 0; |
819 | desc->last_cookie = *desc->dir_cookie; | 829 | desc->last_cookie = *desc->dir_cookie; |
820 | desc->page = page; | 830 | desc->page = page; |
831 | ctx->duped = 0; | ||
821 | 832 | ||
822 | status = nfs_readdir_xdr_to_array(desc, page, inode); | 833 | status = nfs_readdir_xdr_to_array(desc, page, inode); |
823 | if (status < 0) | 834 | if (status < 0) |
@@ -945,15 +956,19 @@ out: | |||
945 | * All directory operations under NFS are synchronous, so fsync() | 956 | * All directory operations under NFS are synchronous, so fsync() |
946 | * is a dummy operation. | 957 | * is a dummy operation. |
947 | */ | 958 | */ |
948 | static int nfs_fsync_dir(struct file *filp, int datasync) | 959 | static int nfs_fsync_dir(struct file *filp, loff_t start, loff_t end, |
960 | int datasync) | ||
949 | { | 961 | { |
950 | struct dentry *dentry = filp->f_path.dentry; | 962 | struct dentry *dentry = filp->f_path.dentry; |
963 | struct inode *inode = dentry->d_inode; | ||
951 | 964 | ||
952 | dfprintk(FILE, "NFS: fsync dir(%s/%s) datasync %d\n", | 965 | dfprintk(FILE, "NFS: fsync dir(%s/%s) datasync %d\n", |
953 | dentry->d_parent->d_name.name, dentry->d_name.name, | 966 | dentry->d_parent->d_name.name, dentry->d_name.name, |
954 | datasync); | 967 | datasync); |
955 | 968 | ||
969 | mutex_lock(&inode->i_mutex); | ||
956 | nfs_inc_stats(dentry->d_inode, NFSIOS_VFSFSYNC); | 970 | nfs_inc_stats(dentry->d_inode, NFSIOS_VFSFSYNC); |
971 | mutex_unlock(&inode->i_mutex); | ||
957 | return 0; | 972 | return 0; |
958 | } | 973 | } |
959 | 974 | ||
@@ -997,14 +1012,12 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry) | |||
997 | * Return the intent data that applies to this particular path component | 1012 | * Return the intent data that applies to this particular path component |
998 | * | 1013 | * |
999 | * Note that the current set of intents only apply to the very last | 1014 | * Note that the current set of intents only apply to the very last |
1000 | * component of the path. | 1015 | * component of the path and none of them is set before that last |
1001 | * We check for this using LOOKUP_CONTINUE and LOOKUP_PARENT. | 1016 | * component. |
1002 | */ | 1017 | */ |
1003 | static inline unsigned int nfs_lookup_check_intent(struct nameidata *nd, | 1018 | static inline unsigned int nfs_lookup_check_intent(struct nameidata *nd, |
1004 | unsigned int mask) | 1019 | unsigned int mask) |
1005 | { | 1020 | { |
1006 | if (nd->flags & (LOOKUP_CONTINUE|LOOKUP_PARENT)) | ||
1007 | return 0; | ||
1008 | return nd->flags & mask; | 1021 | return nd->flags & mask; |
1009 | } | 1022 | } |
1010 | 1023 | ||
@@ -1338,25 +1351,31 @@ static int is_atomic_open(struct nameidata *nd) | |||
1338 | return 0; | 1351 | return 0; |
1339 | /* Are we trying to write to a read only partition? */ | 1352 | /* Are we trying to write to a read only partition? */ |
1340 | if (__mnt_is_readonly(nd->path.mnt) && | 1353 | if (__mnt_is_readonly(nd->path.mnt) && |
1341 | (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) | 1354 | (nd->intent.open.flags & (O_CREAT|O_TRUNC|O_ACCMODE))) |
1342 | return 0; | 1355 | return 0; |
1343 | return 1; | 1356 | return 1; |
1344 | } | 1357 | } |
1345 | 1358 | ||
1346 | static struct nfs_open_context *nameidata_to_nfs_open_context(struct dentry *dentry, struct nameidata *nd) | 1359 | static fmode_t flags_to_mode(int flags) |
1360 | { | ||
1361 | fmode_t res = (__force fmode_t)flags & FMODE_EXEC; | ||
1362 | if ((flags & O_ACCMODE) != O_WRONLY) | ||
1363 | res |= FMODE_READ; | ||
1364 | if ((flags & O_ACCMODE) != O_RDONLY) | ||
1365 | res |= FMODE_WRITE; | ||
1366 | return res; | ||
1367 | } | ||
1368 | |||
1369 | static struct nfs_open_context *create_nfs_open_context(struct dentry *dentry, int open_flags) | ||
1347 | { | 1370 | { |
1348 | struct path path = { | ||
1349 | .mnt = nd->path.mnt, | ||
1350 | .dentry = dentry, | ||
1351 | }; | ||
1352 | struct nfs_open_context *ctx; | 1371 | struct nfs_open_context *ctx; |
1353 | struct rpc_cred *cred; | 1372 | struct rpc_cred *cred; |
1354 | fmode_t fmode = nd->intent.open.flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC); | 1373 | fmode_t fmode = flags_to_mode(open_flags); |
1355 | 1374 | ||
1356 | cred = rpc_lookup_cred(); | 1375 | cred = rpc_lookup_cred(); |
1357 | if (IS_ERR(cred)) | 1376 | if (IS_ERR(cred)) |
1358 | return ERR_CAST(cred); | 1377 | return ERR_CAST(cred); |
1359 | ctx = alloc_nfs_open_context(&path, cred, fmode); | 1378 | ctx = alloc_nfs_open_context(dentry, cred, fmode); |
1360 | put_rpccred(cred); | 1379 | put_rpccred(cred); |
1361 | if (ctx == NULL) | 1380 | if (ctx == NULL) |
1362 | return ERR_PTR(-ENOMEM); | 1381 | return ERR_PTR(-ENOMEM); |
@@ -1376,13 +1395,13 @@ static int nfs_intent_set_file(struct nameidata *nd, struct nfs_open_context *ct | |||
1376 | 1395 | ||
1377 | /* If the open_intent is for execute, we have an extra check to make */ | 1396 | /* If the open_intent is for execute, we have an extra check to make */ |
1378 | if (ctx->mode & FMODE_EXEC) { | 1397 | if (ctx->mode & FMODE_EXEC) { |
1379 | ret = nfs_may_open(ctx->path.dentry->d_inode, | 1398 | ret = nfs_may_open(ctx->dentry->d_inode, |
1380 | ctx->cred, | 1399 | ctx->cred, |
1381 | nd->intent.open.flags); | 1400 | nd->intent.open.flags); |
1382 | if (ret < 0) | 1401 | if (ret < 0) |
1383 | goto out; | 1402 | goto out; |
1384 | } | 1403 | } |
1385 | filp = lookup_instantiate_filp(nd, ctx->path.dentry, do_open); | 1404 | filp = lookup_instantiate_filp(nd, ctx->dentry, do_open); |
1386 | if (IS_ERR(filp)) | 1405 | if (IS_ERR(filp)) |
1387 | ret = PTR_ERR(filp); | 1406 | ret = PTR_ERR(filp); |
1388 | else | 1407 | else |
@@ -1420,12 +1439,13 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry | |||
1420 | goto out; | 1439 | goto out; |
1421 | } | 1440 | } |
1422 | 1441 | ||
1423 | ctx = nameidata_to_nfs_open_context(dentry, nd); | 1442 | open_flags = nd->intent.open.flags; |
1443 | |||
1444 | ctx = create_nfs_open_context(dentry, open_flags); | ||
1424 | res = ERR_CAST(ctx); | 1445 | res = ERR_CAST(ctx); |
1425 | if (IS_ERR(ctx)) | 1446 | if (IS_ERR(ctx)) |
1426 | goto out; | 1447 | goto out; |
1427 | 1448 | ||
1428 | open_flags = nd->intent.open.flags; | ||
1429 | if (nd->flags & LOOKUP_CREATE) { | 1449 | if (nd->flags & LOOKUP_CREATE) { |
1430 | attr.ia_mode = nd->intent.open.create_mode; | 1450 | attr.ia_mode = nd->intent.open.create_mode; |
1431 | attr.ia_valid = ATTR_MODE; | 1451 | attr.ia_valid = ATTR_MODE; |
@@ -1448,12 +1468,12 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry | |||
1448 | res = NULL; | 1468 | res = NULL; |
1449 | goto out; | 1469 | goto out; |
1450 | /* This turned out not to be a regular file */ | 1470 | /* This turned out not to be a regular file */ |
1471 | case -EISDIR: | ||
1451 | case -ENOTDIR: | 1472 | case -ENOTDIR: |
1452 | goto no_open; | 1473 | goto no_open; |
1453 | case -ELOOP: | 1474 | case -ELOOP: |
1454 | if (!(nd->intent.open.flags & O_NOFOLLOW)) | 1475 | if (!(nd->intent.open.flags & O_NOFOLLOW)) |
1455 | goto no_open; | 1476 | goto no_open; |
1456 | /* case -EISDIR: */ | ||
1457 | /* case -EINVAL: */ | 1477 | /* case -EINVAL: */ |
1458 | default: | 1478 | default: |
1459 | res = ERR_CAST(inode); | 1479 | res = ERR_CAST(inode); |
@@ -1463,8 +1483,8 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry | |||
1463 | res = d_add_unique(dentry, inode); | 1483 | res = d_add_unique(dentry, inode); |
1464 | nfs_unblock_sillyrename(dentry->d_parent); | 1484 | nfs_unblock_sillyrename(dentry->d_parent); |
1465 | if (res != NULL) { | 1485 | if (res != NULL) { |
1466 | dput(ctx->path.dentry); | 1486 | dput(ctx->dentry); |
1467 | ctx->path.dentry = dget(res); | 1487 | ctx->dentry = dget(res); |
1468 | dentry = res; | 1488 | dentry = res; |
1469 | } | 1489 | } |
1470 | err = nfs_intent_set_file(nd, ctx); | 1490 | err = nfs_intent_set_file(nd, ctx); |
@@ -1517,7 +1537,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) | |||
1517 | /* We can't create new files, or truncate existing ones here */ | 1537 | /* We can't create new files, or truncate existing ones here */ |
1518 | openflags &= ~(O_CREAT|O_EXCL|O_TRUNC); | 1538 | openflags &= ~(O_CREAT|O_EXCL|O_TRUNC); |
1519 | 1539 | ||
1520 | ctx = nameidata_to_nfs_open_context(dentry, nd); | 1540 | ctx = create_nfs_open_context(dentry, openflags); |
1521 | ret = PTR_ERR(ctx); | 1541 | ret = PTR_ERR(ctx); |
1522 | if (IS_ERR(ctx)) | 1542 | if (IS_ERR(ctx)) |
1523 | goto out; | 1543 | goto out; |
@@ -1570,7 +1590,7 @@ static int nfs_open_create(struct inode *dir, struct dentry *dentry, int mode, | |||
1570 | struct nfs_open_context *ctx = NULL; | 1590 | struct nfs_open_context *ctx = NULL; |
1571 | struct iattr attr; | 1591 | struct iattr attr; |
1572 | int error; | 1592 | int error; |
1573 | int open_flags = 0; | 1593 | int open_flags = O_CREAT|O_EXCL; |
1574 | 1594 | ||
1575 | dfprintk(VFS, "NFS: create(%s/%ld), %s\n", | 1595 | dfprintk(VFS, "NFS: create(%s/%ld), %s\n", |
1576 | dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); | 1596 | dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); |
@@ -1578,27 +1598,27 @@ static int nfs_open_create(struct inode *dir, struct dentry *dentry, int mode, | |||
1578 | attr.ia_mode = mode; | 1598 | attr.ia_mode = mode; |
1579 | attr.ia_valid = ATTR_MODE; | 1599 | attr.ia_valid = ATTR_MODE; |
1580 | 1600 | ||
1581 | if ((nd->flags & LOOKUP_CREATE) != 0) { | 1601 | if (nd) |
1582 | open_flags = nd->intent.open.flags; | 1602 | open_flags = nd->intent.open.flags; |
1583 | 1603 | ||
1584 | ctx = nameidata_to_nfs_open_context(dentry, nd); | 1604 | ctx = create_nfs_open_context(dentry, open_flags); |
1585 | error = PTR_ERR(ctx); | 1605 | error = PTR_ERR(ctx); |
1586 | if (IS_ERR(ctx)) | 1606 | if (IS_ERR(ctx)) |
1587 | goto out_err_drop; | 1607 | goto out_err_drop; |
1588 | } | ||
1589 | 1608 | ||
1590 | error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, ctx); | 1609 | error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, ctx); |
1591 | if (error != 0) | 1610 | if (error != 0) |
1592 | goto out_put_ctx; | 1611 | goto out_put_ctx; |
1593 | if (ctx != NULL) { | 1612 | if (nd) { |
1594 | error = nfs_intent_set_file(nd, ctx); | 1613 | error = nfs_intent_set_file(nd, ctx); |
1595 | if (error < 0) | 1614 | if (error < 0) |
1596 | goto out_err; | 1615 | goto out_err; |
1616 | } else { | ||
1617 | put_nfs_open_context(ctx); | ||
1597 | } | 1618 | } |
1598 | return 0; | 1619 | return 0; |
1599 | out_put_ctx: | 1620 | out_put_ctx: |
1600 | if (ctx != NULL) | 1621 | put_nfs_open_context(ctx); |
1601 | put_nfs_open_context(ctx); | ||
1602 | out_err_drop: | 1622 | out_err_drop: |
1603 | d_drop(dentry); | 1623 | d_drop(dentry); |
1604 | out_err: | 1624 | out_err: |
@@ -1660,7 +1680,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode, | |||
1660 | { | 1680 | { |
1661 | struct iattr attr; | 1681 | struct iattr attr; |
1662 | int error; | 1682 | int error; |
1663 | int open_flags = 0; | 1683 | int open_flags = O_CREAT|O_EXCL; |
1664 | 1684 | ||
1665 | dfprintk(VFS, "NFS: create(%s/%ld), %s\n", | 1685 | dfprintk(VFS, "NFS: create(%s/%ld), %s\n", |
1666 | dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); | 1686 | dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); |
@@ -1668,7 +1688,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode, | |||
1668 | attr.ia_mode = mode; | 1688 | attr.ia_mode = mode; |
1669 | attr.ia_valid = ATTR_MODE; | 1689 | attr.ia_valid = ATTR_MODE; |
1670 | 1690 | ||
1671 | if ((nd->flags & LOOKUP_CREATE) != 0) | 1691 | if (nd) |
1672 | open_flags = nd->intent.open.flags; | 1692 | open_flags = nd->intent.open.flags; |
1673 | 1693 | ||
1674 | error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, NULL); | 1694 | error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, NULL); |
@@ -2259,11 +2279,11 @@ static int nfs_open_permission_mask(int openflags) | |||
2259 | { | 2279 | { |
2260 | int mask = 0; | 2280 | int mask = 0; |
2261 | 2281 | ||
2262 | if (openflags & FMODE_READ) | 2282 | if ((openflags & O_ACCMODE) != O_WRONLY) |
2263 | mask |= MAY_READ; | 2283 | mask |= MAY_READ; |
2264 | if (openflags & FMODE_WRITE) | 2284 | if ((openflags & O_ACCMODE) != O_RDONLY) |
2265 | mask |= MAY_WRITE; | 2285 | mask |= MAY_WRITE; |
2266 | if (openflags & FMODE_EXEC) | 2286 | if (openflags & __FMODE_EXEC) |
2267 | mask |= MAY_EXEC; | 2287 | mask |= MAY_EXEC; |
2268 | return mask; | 2288 | return mask; |
2269 | } | 2289 | } |
@@ -2273,12 +2293,12 @@ int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags) | |||
2273 | return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags)); | 2293 | return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags)); |
2274 | } | 2294 | } |
2275 | 2295 | ||
2276 | int nfs_permission(struct inode *inode, int mask, unsigned int flags) | 2296 | int nfs_permission(struct inode *inode, int mask) |
2277 | { | 2297 | { |
2278 | struct rpc_cred *cred; | 2298 | struct rpc_cred *cred; |
2279 | int res = 0; | 2299 | int res = 0; |
2280 | 2300 | ||
2281 | if (flags & IPERM_FLAG_RCU) | 2301 | if (mask & MAY_NOT_BLOCK) |
2282 | return -ECHILD; | 2302 | return -ECHILD; |
2283 | 2303 | ||
2284 | nfs_inc_stats(inode, NFSIOS_VFSACCESS); | 2304 | nfs_inc_stats(inode, NFSIOS_VFSACCESS); |
@@ -2328,7 +2348,7 @@ out: | |||
2328 | out_notsup: | 2348 | out_notsup: |
2329 | res = nfs_revalidate_inode(NFS_SERVER(inode), inode); | 2349 | res = nfs_revalidate_inode(NFS_SERVER(inode), inode); |
2330 | if (res == 0) | 2350 | if (res == 0) |
2331 | res = generic_permission(inode, mask, flags, NULL); | 2351 | res = generic_permission(inode, mask); |
2332 | goto out; | 2352 | goto out; |
2333 | } | 2353 | } |
2334 | 2354 | ||
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 8eea2536671..1940f1a56a5 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c | |||
@@ -53,7 +53,7 @@ | |||
53 | 53 | ||
54 | #include <asm/system.h> | 54 | #include <asm/system.h> |
55 | #include <asm/uaccess.h> | 55 | #include <asm/uaccess.h> |
56 | #include <asm/atomic.h> | 56 | #include <linux/atomic.h> |
57 | 57 | ||
58 | #include "internal.h" | 58 | #include "internal.h" |
59 | #include "iostat.h" | 59 | #include "iostat.h" |
@@ -284,7 +284,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, | |||
284 | loff_t pos) | 284 | loff_t pos) |
285 | { | 285 | { |
286 | struct nfs_open_context *ctx = dreq->ctx; | 286 | struct nfs_open_context *ctx = dreq->ctx; |
287 | struct inode *inode = ctx->path.dentry->d_inode; | 287 | struct inode *inode = ctx->dentry->d_inode; |
288 | unsigned long user_addr = (unsigned long)iov->iov_base; | 288 | unsigned long user_addr = (unsigned long)iov->iov_base; |
289 | size_t count = iov->iov_len; | 289 | size_t count = iov->iov_len; |
290 | size_t rsize = NFS_SERVER(inode)->rsize; | 290 | size_t rsize = NFS_SERVER(inode)->rsize; |
@@ -715,7 +715,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, | |||
715 | loff_t pos, int sync) | 715 | loff_t pos, int sync) |
716 | { | 716 | { |
717 | struct nfs_open_context *ctx = dreq->ctx; | 717 | struct nfs_open_context *ctx = dreq->ctx; |
718 | struct inode *inode = ctx->path.dentry->d_inode; | 718 | struct inode *inode = ctx->dentry->d_inode; |
719 | unsigned long user_addr = (unsigned long)iov->iov_base; | 719 | unsigned long user_addr = (unsigned long)iov->iov_base; |
720 | size_t count = iov->iov_len; | 720 | size_t count = iov->iov_len; |
721 | struct rpc_task *task; | 721 | struct rpc_task *task; |
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 2f093ed1698..b76be2fb573 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -55,7 +55,7 @@ static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe, | |||
55 | static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov, | 55 | static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov, |
56 | unsigned long nr_segs, loff_t pos); | 56 | unsigned long nr_segs, loff_t pos); |
57 | static int nfs_file_flush(struct file *, fl_owner_t id); | 57 | static int nfs_file_flush(struct file *, fl_owner_t id); |
58 | static int nfs_file_fsync(struct file *, int datasync); | 58 | static int nfs_file_fsync(struct file *, loff_t, loff_t, int datasync); |
59 | static int nfs_check_flags(int flags); | 59 | static int nfs_check_flags(int flags); |
60 | static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl); | 60 | static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl); |
61 | static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl); | 61 | static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl); |
@@ -187,8 +187,11 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) | |||
187 | filp->f_path.dentry->d_name.name, | 187 | filp->f_path.dentry->d_name.name, |
188 | offset, origin); | 188 | offset, origin); |
189 | 189 | ||
190 | /* origin == SEEK_END => we must revalidate the cached file length */ | 190 | /* |
191 | if (origin == SEEK_END) { | 191 | * origin == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate |
192 | * the cached file length | ||
193 | */ | ||
194 | if (origin != SEEK_SET && origin != SEEK_CUR) { | ||
192 | struct inode *inode = filp->f_mapping->host; | 195 | struct inode *inode = filp->f_mapping->host; |
193 | 196 | ||
194 | int retval = nfs_revalidate_file_size(inode, filp); | 197 | int retval = nfs_revalidate_file_size(inode, filp); |
@@ -305,7 +308,7 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma) | |||
305 | * fall back to doing a synchronous write. | 308 | * fall back to doing a synchronous write. |
306 | */ | 309 | */ |
307 | static int | 310 | static int |
308 | nfs_file_fsync(struct file *file, int datasync) | 311 | nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) |
309 | { | 312 | { |
310 | struct dentry *dentry = file->f_path.dentry; | 313 | struct dentry *dentry = file->f_path.dentry; |
311 | struct nfs_open_context *ctx = nfs_file_open_context(file); | 314 | struct nfs_open_context *ctx = nfs_file_open_context(file); |
@@ -313,14 +316,18 @@ nfs_file_fsync(struct file *file, int datasync) | |||
313 | int have_error, status; | 316 | int have_error, status; |
314 | int ret = 0; | 317 | int ret = 0; |
315 | 318 | ||
316 | |||
317 | dprintk("NFS: fsync file(%s/%s) datasync %d\n", | 319 | dprintk("NFS: fsync file(%s/%s) datasync %d\n", |
318 | dentry->d_parent->d_name.name, dentry->d_name.name, | 320 | dentry->d_parent->d_name.name, dentry->d_name.name, |
319 | datasync); | 321 | datasync); |
320 | 322 | ||
323 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); | ||
324 | mutex_lock(&inode->i_mutex); | ||
325 | |||
321 | nfs_inc_stats(inode, NFSIOS_VFSFSYNC); | 326 | nfs_inc_stats(inode, NFSIOS_VFSFSYNC); |
322 | have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); | 327 | have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); |
323 | status = nfs_commit_inode(inode, FLUSH_SYNC); | 328 | status = nfs_commit_inode(inode, FLUSH_SYNC); |
329 | if (status >= 0 && ret < 0) | ||
330 | status = ret; | ||
324 | have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); | 331 | have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); |
325 | if (have_error) | 332 | if (have_error) |
326 | ret = xchg(&ctx->error, 0); | 333 | ret = xchg(&ctx->error, 0); |
@@ -329,6 +336,7 @@ nfs_file_fsync(struct file *file, int datasync) | |||
329 | if (!ret && !datasync) | 336 | if (!ret && !datasync) |
330 | /* application has asked for meta-data sync */ | 337 | /* application has asked for meta-data sync */ |
331 | ret = pnfs_layoutcommit_inode(inode, true); | 338 | ret = pnfs_layoutcommit_inode(inode, true); |
339 | mutex_unlock(&inode->i_mutex); | ||
332 | return ret; | 340 | return ret; |
333 | } | 341 | } |
334 | 342 | ||
@@ -887,3 +895,35 @@ static int nfs_setlease(struct file *file, long arg, struct file_lock **fl) | |||
887 | file->f_path.dentry->d_name.name, arg); | 895 | file->f_path.dentry->d_name.name, arg); |
888 | return -EINVAL; | 896 | return -EINVAL; |
889 | } | 897 | } |
898 | |||
899 | #ifdef CONFIG_NFS_V4 | ||
900 | static int | ||
901 | nfs4_file_open(struct inode *inode, struct file *filp) | ||
902 | { | ||
903 | /* | ||
904 | * NFSv4 opens are handled in d_lookup and d_revalidate. If we get to | ||
905 | * this point, then something is very wrong | ||
906 | */ | ||
907 | dprintk("NFS: %s called! inode=%p filp=%p\n", __func__, inode, filp); | ||
908 | return -ENOTDIR; | ||
909 | } | ||
910 | |||
911 | const struct file_operations nfs4_file_operations = { | ||
912 | .llseek = nfs_file_llseek, | ||
913 | .read = do_sync_read, | ||
914 | .write = do_sync_write, | ||
915 | .aio_read = nfs_file_read, | ||
916 | .aio_write = nfs_file_write, | ||
917 | .mmap = nfs_file_mmap, | ||
918 | .open = nfs4_file_open, | ||
919 | .flush = nfs_file_flush, | ||
920 | .release = nfs_file_release, | ||
921 | .fsync = nfs_file_fsync, | ||
922 | .lock = nfs_lock, | ||
923 | .flock = nfs_flock, | ||
924 | .splice_read = nfs_file_splice_read, | ||
925 | .splice_write = nfs_file_splice_write, | ||
926 | .check_flags = nfs_check_flags, | ||
927 | .setlease = nfs_setlease, | ||
928 | }; | ||
929 | #endif /* CONFIG_NFS_V4 */ | ||
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 79664a1025a..47d1c6ff2d8 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c | |||
@@ -36,6 +36,8 @@ | |||
36 | #include <linux/types.h> | 36 | #include <linux/types.h> |
37 | #include <linux/string.h> | 37 | #include <linux/string.h> |
38 | #include <linux/kernel.h> | 38 | #include <linux/kernel.h> |
39 | #include <linux/slab.h> | ||
40 | #include <linux/nfs_idmap.h> | ||
39 | 41 | ||
40 | static int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res) | 42 | static int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res) |
41 | { | 43 | { |
@@ -59,12 +61,10 @@ static int nfs_map_numeric_to_string(__u32 id, char *buf, size_t buflen) | |||
59 | 61 | ||
60 | #ifdef CONFIG_NFS_USE_NEW_IDMAPPER | 62 | #ifdef CONFIG_NFS_USE_NEW_IDMAPPER |
61 | 63 | ||
62 | #include <linux/slab.h> | ||
63 | #include <linux/cred.h> | 64 | #include <linux/cred.h> |
64 | #include <linux/sunrpc/sched.h> | 65 | #include <linux/sunrpc/sched.h> |
65 | #include <linux/nfs4.h> | 66 | #include <linux/nfs4.h> |
66 | #include <linux/nfs_fs_sb.h> | 67 | #include <linux/nfs_fs_sb.h> |
67 | #include <linux/nfs_idmap.h> | ||
68 | #include <linux/keyctl.h> | 68 | #include <linux/keyctl.h> |
69 | #include <linux/key-type.h> | 69 | #include <linux/key-type.h> |
70 | #include <linux/rcupdate.h> | 70 | #include <linux/rcupdate.h> |
@@ -284,18 +284,15 @@ int nfs_map_gid_to_group(const struct nfs_server *server, __u32 gid, char *buf, | |||
284 | #include <linux/module.h> | 284 | #include <linux/module.h> |
285 | #include <linux/mutex.h> | 285 | #include <linux/mutex.h> |
286 | #include <linux/init.h> | 286 | #include <linux/init.h> |
287 | #include <linux/slab.h> | ||
288 | #include <linux/socket.h> | 287 | #include <linux/socket.h> |
289 | #include <linux/in.h> | 288 | #include <linux/in.h> |
290 | #include <linux/sched.h> | 289 | #include <linux/sched.h> |
291 | |||
292 | #include <linux/sunrpc/clnt.h> | 290 | #include <linux/sunrpc/clnt.h> |
293 | #include <linux/workqueue.h> | 291 | #include <linux/workqueue.h> |
294 | #include <linux/sunrpc/rpc_pipe_fs.h> | 292 | #include <linux/sunrpc/rpc_pipe_fs.h> |
295 | 293 | ||
296 | #include <linux/nfs_fs.h> | 294 | #include <linux/nfs_fs.h> |
297 | 295 | ||
298 | #include <linux/nfs_idmap.h> | ||
299 | #include "nfs4_fs.h" | 296 | #include "nfs4_fs.h" |
300 | 297 | ||
301 | #define IDMAP_HASH_SZ 128 | 298 | #define IDMAP_HASH_SZ 128 |
@@ -339,8 +336,6 @@ struct idmap { | |||
339 | struct idmap_hashtable idmap_group_hash; | 336 | struct idmap_hashtable idmap_group_hash; |
340 | }; | 337 | }; |
341 | 338 | ||
342 | static ssize_t idmap_pipe_upcall(struct file *, struct rpc_pipe_msg *, | ||
343 | char __user *, size_t); | ||
344 | static ssize_t idmap_pipe_downcall(struct file *, const char __user *, | 339 | static ssize_t idmap_pipe_downcall(struct file *, const char __user *, |
345 | size_t); | 340 | size_t); |
346 | static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *); | 341 | static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *); |
@@ -348,7 +343,7 @@ static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *); | |||
348 | static unsigned int fnvhash32(const void *, size_t); | 343 | static unsigned int fnvhash32(const void *, size_t); |
349 | 344 | ||
350 | static const struct rpc_pipe_ops idmap_upcall_ops = { | 345 | static const struct rpc_pipe_ops idmap_upcall_ops = { |
351 | .upcall = idmap_pipe_upcall, | 346 | .upcall = rpc_pipe_generic_upcall, |
352 | .downcall = idmap_pipe_downcall, | 347 | .downcall = idmap_pipe_downcall, |
353 | .destroy_msg = idmap_pipe_destroy_msg, | 348 | .destroy_msg = idmap_pipe_destroy_msg, |
354 | }; | 349 | }; |
@@ -598,27 +593,6 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h, | |||
598 | return ret; | 593 | return ret; |
599 | } | 594 | } |
600 | 595 | ||
601 | /* RPC pipefs upcall/downcall routines */ | ||
602 | static ssize_t | ||
603 | idmap_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg, | ||
604 | char __user *dst, size_t buflen) | ||
605 | { | ||
606 | char *data = (char *)msg->data + msg->copied; | ||
607 | size_t mlen = min(msg->len, buflen); | ||
608 | unsigned long left; | ||
609 | |||
610 | left = copy_to_user(dst, data, mlen); | ||
611 | if (left == mlen) { | ||
612 | msg->errno = -EFAULT; | ||
613 | return -EFAULT; | ||
614 | } | ||
615 | |||
616 | mlen -= left; | ||
617 | msg->copied += mlen; | ||
618 | msg->errno = 0; | ||
619 | return mlen; | ||
620 | } | ||
621 | |||
622 | static ssize_t | 596 | static ssize_t |
623 | idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) | 597 | idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) |
624 | { | 598 | { |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 6f4850deb27..679d2f50b14 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -291,7 +291,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) | |||
291 | */ | 291 | */ |
292 | inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->file_inode_ops; | 292 | inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->file_inode_ops; |
293 | if (S_ISREG(inode->i_mode)) { | 293 | if (S_ISREG(inode->i_mode)) { |
294 | inode->i_fop = &nfs_file_operations; | 294 | inode->i_fop = NFS_SB(sb)->nfs_client->rpc_ops->file_ops; |
295 | inode->i_data.a_ops = &nfs_file_aops; | 295 | inode->i_data.a_ops = &nfs_file_aops; |
296 | inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info; | 296 | inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info; |
297 | } else if (S_ISDIR(inode->i_mode)) { | 297 | } else if (S_ISDIR(inode->i_mode)) { |
@@ -567,7 +567,7 @@ static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context | |||
567 | struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx) | 567 | struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx) |
568 | { | 568 | { |
569 | struct nfs_lock_context *res, *new = NULL; | 569 | struct nfs_lock_context *res, *new = NULL; |
570 | struct inode *inode = ctx->path.dentry->d_inode; | 570 | struct inode *inode = ctx->dentry->d_inode; |
571 | 571 | ||
572 | spin_lock(&inode->i_lock); | 572 | spin_lock(&inode->i_lock); |
573 | res = __nfs_find_lock_context(ctx); | 573 | res = __nfs_find_lock_context(ctx); |
@@ -594,7 +594,7 @@ struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx) | |||
594 | void nfs_put_lock_context(struct nfs_lock_context *l_ctx) | 594 | void nfs_put_lock_context(struct nfs_lock_context *l_ctx) |
595 | { | 595 | { |
596 | struct nfs_open_context *ctx = l_ctx->open_context; | 596 | struct nfs_open_context *ctx = l_ctx->open_context; |
597 | struct inode *inode = ctx->path.dentry->d_inode; | 597 | struct inode *inode = ctx->dentry->d_inode; |
598 | 598 | ||
599 | if (!atomic_dec_and_lock(&l_ctx->count, &inode->i_lock)) | 599 | if (!atomic_dec_and_lock(&l_ctx->count, &inode->i_lock)) |
600 | return; | 600 | return; |
@@ -620,7 +620,7 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync) | |||
620 | return; | 620 | return; |
621 | if (!is_sync) | 621 | if (!is_sync) |
622 | return; | 622 | return; |
623 | inode = ctx->path.dentry->d_inode; | 623 | inode = ctx->dentry->d_inode; |
624 | if (!list_empty(&NFS_I(inode)->open_files)) | 624 | if (!list_empty(&NFS_I(inode)->open_files)) |
625 | return; | 625 | return; |
626 | server = NFS_SERVER(inode); | 626 | server = NFS_SERVER(inode); |
@@ -629,14 +629,14 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync) | |||
629 | nfs_revalidate_inode(server, inode); | 629 | nfs_revalidate_inode(server, inode); |
630 | } | 630 | } |
631 | 631 | ||
632 | struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct rpc_cred *cred, fmode_t f_mode) | 632 | struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred, fmode_t f_mode) |
633 | { | 633 | { |
634 | struct nfs_open_context *ctx; | 634 | struct nfs_open_context *ctx; |
635 | 635 | ||
636 | ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); | 636 | ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); |
637 | if (ctx != NULL) { | 637 | if (ctx != NULL) { |
638 | ctx->path = *path; | 638 | nfs_sb_active(dentry->d_sb); |
639 | path_get(&ctx->path); | 639 | ctx->dentry = dget(dentry); |
640 | ctx->cred = get_rpccred(cred); | 640 | ctx->cred = get_rpccred(cred); |
641 | ctx->state = NULL; | 641 | ctx->state = NULL; |
642 | ctx->mode = f_mode; | 642 | ctx->mode = f_mode; |
@@ -658,7 +658,8 @@ struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) | |||
658 | 658 | ||
659 | static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync) | 659 | static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync) |
660 | { | 660 | { |
661 | struct inode *inode = ctx->path.dentry->d_inode; | 661 | struct inode *inode = ctx->dentry->d_inode; |
662 | struct super_block *sb = ctx->dentry->d_sb; | ||
662 | 663 | ||
663 | if (!list_empty(&ctx->list)) { | 664 | if (!list_empty(&ctx->list)) { |
664 | if (!atomic_dec_and_lock(&ctx->lock_context.count, &inode->i_lock)) | 665 | if (!atomic_dec_and_lock(&ctx->lock_context.count, &inode->i_lock)) |
@@ -671,7 +672,8 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync) | |||
671 | NFS_PROTO(inode)->close_context(ctx, is_sync); | 672 | NFS_PROTO(inode)->close_context(ctx, is_sync); |
672 | if (ctx->cred != NULL) | 673 | if (ctx->cred != NULL) |
673 | put_rpccred(ctx->cred); | 674 | put_rpccred(ctx->cred); |
674 | path_put(&ctx->path); | 675 | dput(ctx->dentry); |
676 | nfs_sb_deactive(sb); | ||
675 | kfree(ctx); | 677 | kfree(ctx); |
676 | } | 678 | } |
677 | 679 | ||
@@ -741,7 +743,7 @@ int nfs_open(struct inode *inode, struct file *filp) | |||
741 | cred = rpc_lookup_cred(); | 743 | cred = rpc_lookup_cred(); |
742 | if (IS_ERR(cred)) | 744 | if (IS_ERR(cred)) |
743 | return PTR_ERR(cred); | 745 | return PTR_ERR(cred); |
744 | ctx = alloc_nfs_open_context(&filp->f_path, cred, filp->f_mode); | 746 | ctx = alloc_nfs_open_context(filp->f_path.dentry, cred, filp->f_mode); |
745 | put_rpccred(cred); | 747 | put_rpccred(cred); |
746 | if (ctx == NULL) | 748 | if (ctx == NULL) |
747 | return -ENOMEM; | 749 | return -ENOMEM; |
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 2a55347a2da..ab12913dd47 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
@@ -277,6 +277,9 @@ extern void nfs_sb_deactive(struct super_block *sb); | |||
277 | extern char *nfs_path(char **p, struct dentry *dentry, | 277 | extern char *nfs_path(char **p, struct dentry *dentry, |
278 | char *buffer, ssize_t buflen); | 278 | char *buffer, ssize_t buflen); |
279 | extern struct vfsmount *nfs_d_automount(struct path *path); | 279 | extern struct vfsmount *nfs_d_automount(struct path *path); |
280 | #ifdef CONFIG_NFS_V4 | ||
281 | rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *); | ||
282 | #endif | ||
280 | 283 | ||
281 | /* getroot.c */ | 284 | /* getroot.c */ |
282 | extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *, | 285 | extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *, |
@@ -288,12 +291,22 @@ extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *, | |||
288 | extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh); | 291 | extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh); |
289 | #endif | 292 | #endif |
290 | 293 | ||
294 | struct nfs_pageio_descriptor; | ||
291 | /* read.c */ | 295 | /* read.c */ |
292 | extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, | 296 | extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, |
293 | const struct rpc_call_ops *call_ops); | 297 | const struct rpc_call_ops *call_ops); |
294 | extern void nfs_read_prepare(struct rpc_task *task, void *calldata); | 298 | extern void nfs_read_prepare(struct rpc_task *task, void *calldata); |
299 | extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, | ||
300 | struct list_head *head); | ||
301 | |||
302 | extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); | ||
303 | extern void nfs_readdata_release(struct nfs_read_data *rdata); | ||
295 | 304 | ||
296 | /* write.c */ | 305 | /* write.c */ |
306 | extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, | ||
307 | struct list_head *head); | ||
308 | extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); | ||
309 | extern void nfs_writedata_release(struct nfs_write_data *wdata); | ||
297 | extern void nfs_commit_free(struct nfs_write_data *p); | 310 | extern void nfs_commit_free(struct nfs_write_data *p); |
298 | extern int nfs_initiate_write(struct nfs_write_data *data, | 311 | extern int nfs_initiate_write(struct nfs_write_data *data, |
299 | struct rpc_clnt *clnt, | 312 | struct rpc_clnt *clnt, |
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 1f063bacd28..8102391bb37 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c | |||
@@ -119,7 +119,7 @@ Elong: | |||
119 | } | 119 | } |
120 | 120 | ||
121 | #ifdef CONFIG_NFS_V4 | 121 | #ifdef CONFIG_NFS_V4 |
122 | static rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors) | 122 | rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors) |
123 | { | 123 | { |
124 | struct gss_api_mech *mech; | 124 | struct gss_api_mech *mech; |
125 | struct xdr_netobj oid; | 125 | struct xdr_netobj oid; |
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 27434277165..7ef23979896 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c | |||
@@ -415,7 +415,7 @@ fail: | |||
415 | } | 415 | } |
416 | 416 | ||
417 | int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode, | 417 | int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode, |
418 | mode_t mode) | 418 | umode_t mode) |
419 | { | 419 | { |
420 | struct posix_acl *dfacl, *acl; | 420 | struct posix_acl *dfacl, *acl; |
421 | int error = 0; | 421 | int error = 0; |
@@ -427,16 +427,12 @@ int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode, | |||
427 | } | 427 | } |
428 | if (!dfacl) | 428 | if (!dfacl) |
429 | return 0; | 429 | return 0; |
430 | acl = posix_acl_clone(dfacl, GFP_KERNEL); | 430 | acl = posix_acl_dup(dfacl); |
431 | error = -ENOMEM; | 431 | error = posix_acl_create(&acl, GFP_KERNEL, &mode); |
432 | if (!acl) | ||
433 | goto out_release_dfacl; | ||
434 | error = posix_acl_create_masq(acl, &mode); | ||
435 | if (error < 0) | 432 | if (error < 0) |
436 | goto out_release_acl; | 433 | goto out_release_dfacl; |
437 | error = nfs3_proc_setacls(inode, acl, S_ISDIR(inode->i_mode) ? | 434 | error = nfs3_proc_setacls(inode, acl, S_ISDIR(inode->i_mode) ? |
438 | dfacl : NULL); | 435 | dfacl : NULL); |
439 | out_release_acl: | ||
440 | posix_acl_release(acl); | 436 | posix_acl_release(acl); |
441 | out_release_dfacl: | 437 | out_release_dfacl: |
442 | posix_acl_release(dfacl); | 438 | posix_acl_release(dfacl); |
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 38053d823eb..d4bc9ed9174 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c | |||
@@ -316,7 +316,7 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, | |||
316 | int flags, struct nfs_open_context *ctx) | 316 | int flags, struct nfs_open_context *ctx) |
317 | { | 317 | { |
318 | struct nfs3_createdata *data; | 318 | struct nfs3_createdata *data; |
319 | mode_t mode = sattr->ia_mode; | 319 | umode_t mode = sattr->ia_mode; |
320 | int status = -ENOMEM; | 320 | int status = -ENOMEM; |
321 | 321 | ||
322 | dprintk("NFS call create %s\n", dentry->d_name.name); | 322 | dprintk("NFS call create %s\n", dentry->d_name.name); |
@@ -562,7 +562,7 @@ static int | |||
562 | nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) | 562 | nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) |
563 | { | 563 | { |
564 | struct nfs3_createdata *data; | 564 | struct nfs3_createdata *data; |
565 | int mode = sattr->ia_mode; | 565 | umode_t mode = sattr->ia_mode; |
566 | int status = -ENOMEM; | 566 | int status = -ENOMEM; |
567 | 567 | ||
568 | dprintk("NFS call mkdir %s\n", dentry->d_name.name); | 568 | dprintk("NFS call mkdir %s\n", dentry->d_name.name); |
@@ -681,7 +681,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, | |||
681 | dev_t rdev) | 681 | dev_t rdev) |
682 | { | 682 | { |
683 | struct nfs3_createdata *data; | 683 | struct nfs3_createdata *data; |
684 | mode_t mode = sattr->ia_mode; | 684 | umode_t mode = sattr->ia_mode; |
685 | int status = -ENOMEM; | 685 | int status = -ENOMEM; |
686 | 686 | ||
687 | dprintk("NFS call mknod %s %u:%u\n", dentry->d_name.name, | 687 | dprintk("NFS call mknod %s %u:%u\n", dentry->d_name.name, |
@@ -853,6 +853,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = { | |||
853 | .dentry_ops = &nfs_dentry_operations, | 853 | .dentry_ops = &nfs_dentry_operations, |
854 | .dir_inode_ops = &nfs3_dir_inode_operations, | 854 | .dir_inode_ops = &nfs3_dir_inode_operations, |
855 | .file_inode_ops = &nfs3_file_inode_operations, | 855 | .file_inode_ops = &nfs3_file_inode_operations, |
856 | .file_ops = &nfs_file_operations, | ||
856 | .getroot = nfs3_proc_get_root, | 857 | .getroot = nfs3_proc_get_root, |
857 | .getattr = nfs3_proc_getattr, | 858 | .getattr = nfs3_proc_getattr, |
858 | .setattr = nfs3_proc_setattr, | 859 | .setattr = nfs3_proc_setattr, |
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index c4a69833dd0..3e93e9a1bee 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h | |||
@@ -48,6 +48,7 @@ enum nfs4_client_state { | |||
48 | NFS4CLNT_SESSION_RESET, | 48 | NFS4CLNT_SESSION_RESET, |
49 | NFS4CLNT_RECALL_SLOT, | 49 | NFS4CLNT_RECALL_SLOT, |
50 | NFS4CLNT_LEASE_CONFIRM, | 50 | NFS4CLNT_LEASE_CONFIRM, |
51 | NFS4CLNT_SERVER_SCOPE_MISMATCH, | ||
51 | }; | 52 | }; |
52 | 53 | ||
53 | enum nfs4_session_state { | 54 | enum nfs4_session_state { |
@@ -55,6 +56,9 @@ enum nfs4_session_state { | |||
55 | NFS4_SESSION_DRAINING, | 56 | NFS4_SESSION_DRAINING, |
56 | }; | 57 | }; |
57 | 58 | ||
59 | #define NFS4_RENEW_TIMEOUT 0x01 | ||
60 | #define NFS4_RENEW_DELEGATION_CB 0x02 | ||
61 | |||
58 | struct nfs4_minor_version_ops { | 62 | struct nfs4_minor_version_ops { |
59 | u32 minor_version; | 63 | u32 minor_version; |
60 | 64 | ||
@@ -66,6 +70,8 @@ struct nfs4_minor_version_ops { | |||
66 | int cache_reply); | 70 | int cache_reply); |
67 | int (*validate_stateid)(struct nfs_delegation *, | 71 | int (*validate_stateid)(struct nfs_delegation *, |
68 | const nfs4_stateid *); | 72 | const nfs4_stateid *); |
73 | int (*find_root_sec)(struct nfs_server *, struct nfs_fh *, | ||
74 | struct nfs_fsinfo *); | ||
69 | const struct nfs4_state_recovery_ops *reboot_recovery_ops; | 75 | const struct nfs4_state_recovery_ops *reboot_recovery_ops; |
70 | const struct nfs4_state_recovery_ops *nograce_recovery_ops; | 76 | const struct nfs4_state_recovery_ops *nograce_recovery_ops; |
71 | const struct nfs4_state_maintenance_ops *state_renewal_ops; | 77 | const struct nfs4_state_maintenance_ops *state_renewal_ops; |
@@ -222,7 +228,7 @@ struct nfs4_state_recovery_ops { | |||
222 | }; | 228 | }; |
223 | 229 | ||
224 | struct nfs4_state_maintenance_ops { | 230 | struct nfs4_state_maintenance_ops { |
225 | int (*sched_state_renewal)(struct nfs_client *, struct rpc_cred *); | 231 | int (*sched_state_renewal)(struct nfs_client *, struct rpc_cred *, unsigned); |
226 | struct rpc_cred * (*get_state_renewal_cred_locked)(struct nfs_client *); | 232 | struct rpc_cred * (*get_state_renewal_cred_locked)(struct nfs_client *); |
227 | int (*renew_lease)(struct nfs_client *, struct rpc_cred *); | 233 | int (*renew_lease)(struct nfs_client *, struct rpc_cred *); |
228 | }; | 234 | }; |
@@ -234,11 +240,9 @@ extern const struct inode_operations nfs4_dir_inode_operations; | |||
234 | extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *); | 240 | extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *); |
235 | extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *); | 241 | extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *); |
236 | extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred); | 242 | extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred); |
237 | extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *); | ||
238 | extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *); | ||
239 | extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); | 243 | extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); |
240 | extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); | 244 | extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); |
241 | extern int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc); | 245 | extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc); |
242 | extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); | 246 | extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); |
243 | extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, | 247 | extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, |
244 | struct nfs4_fs_locations *fs_locations, struct page *page); | 248 | struct nfs4_fs_locations *fs_locations, struct page *page); |
@@ -315,7 +319,7 @@ extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[]; | |||
315 | extern const u32 nfs4_fattr_bitmap[2]; | 319 | extern const u32 nfs4_fattr_bitmap[2]; |
316 | extern const u32 nfs4_statfs_bitmap[2]; | 320 | extern const u32 nfs4_statfs_bitmap[2]; |
317 | extern const u32 nfs4_pathconf_bitmap[2]; | 321 | extern const u32 nfs4_pathconf_bitmap[2]; |
318 | extern const u32 nfs4_fsinfo_bitmap[2]; | 322 | extern const u32 nfs4_fsinfo_bitmap[3]; |
319 | extern const u32 nfs4_fs_locations_bitmap[2]; | 323 | extern const u32 nfs4_fs_locations_bitmap[2]; |
320 | 324 | ||
321 | /* nfs4renewd.c */ | 325 | /* nfs4renewd.c */ |
@@ -341,14 +345,17 @@ extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struc | |||
341 | extern void nfs4_put_state_owner(struct nfs4_state_owner *); | 345 | extern void nfs4_put_state_owner(struct nfs4_state_owner *); |
342 | extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); | 346 | extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); |
343 | extern void nfs4_put_open_state(struct nfs4_state *); | 347 | extern void nfs4_put_open_state(struct nfs4_state *); |
344 | extern void nfs4_close_state(struct path *, struct nfs4_state *, fmode_t); | 348 | extern void nfs4_close_state(struct nfs4_state *, fmode_t); |
345 | extern void nfs4_close_sync(struct path *, struct nfs4_state *, fmode_t); | 349 | extern void nfs4_close_sync(struct nfs4_state *, fmode_t); |
346 | extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t); | 350 | extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t); |
347 | extern void nfs4_schedule_lease_recovery(struct nfs_client *); | 351 | extern void nfs4_schedule_lease_recovery(struct nfs_client *); |
348 | extern void nfs4_schedule_state_manager(struct nfs_client *); | 352 | extern void nfs4_schedule_state_manager(struct nfs_client *); |
353 | extern void nfs4_schedule_path_down_recovery(struct nfs_client *clp); | ||
349 | extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *); | 354 | extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *); |
350 | extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); | 355 | extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); |
351 | extern void nfs41_handle_recall_slot(struct nfs_client *clp); | 356 | extern void nfs41_handle_recall_slot(struct nfs_client *clp); |
357 | extern void nfs41_handle_server_scope(struct nfs_client *, | ||
358 | struct server_scope **); | ||
352 | extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); | 359 | extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); |
353 | extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); | 360 | extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); |
354 | extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t, pid_t); | 361 | extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t, pid_t); |
@@ -373,8 +380,8 @@ extern struct svc_version nfs4_callback_version4; | |||
373 | 380 | ||
374 | #else | 381 | #else |
375 | 382 | ||
376 | #define nfs4_close_state(a, b, c) do { } while (0) | 383 | #define nfs4_close_state(a, b) do { } while (0) |
377 | #define nfs4_close_sync(a, b, c) do { } while (0) | 384 | #define nfs4_close_sync(a, b) do { } while (0) |
378 | 385 | ||
379 | #endif /* CONFIG_NFS_V4 */ | 386 | #endif /* CONFIG_NFS_V4 */ |
380 | #endif /* __LINUX_FS_NFS_NFS4_FS.H */ | 387 | #endif /* __LINUX_FS_NFS_NFS4_FS.H */ |
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index f9d03abcd04..4c78c62639e 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c | |||
@@ -77,19 +77,6 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset) | |||
77 | BUG(); | 77 | BUG(); |
78 | } | 78 | } |
79 | 79 | ||
80 | /* For data server errors we don't recover from */ | ||
81 | static void | ||
82 | filelayout_set_lo_fail(struct pnfs_layout_segment *lseg) | ||
83 | { | ||
84 | if (lseg->pls_range.iomode == IOMODE_RW) { | ||
85 | dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__); | ||
86 | set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); | ||
87 | } else { | ||
88 | dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__); | ||
89 | set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); | ||
90 | } | ||
91 | } | ||
92 | |||
93 | static int filelayout_async_handle_error(struct rpc_task *task, | 80 | static int filelayout_async_handle_error(struct rpc_task *task, |
94 | struct nfs4_state *state, | 81 | struct nfs4_state *state, |
95 | struct nfs_client *clp, | 82 | struct nfs_client *clp, |
@@ -145,7 +132,7 @@ static int filelayout_read_done_cb(struct rpc_task *task, | |||
145 | dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", | 132 | dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", |
146 | __func__, data->ds_clp, data->ds_clp->cl_session); | 133 | __func__, data->ds_clp, data->ds_clp->cl_session); |
147 | if (reset) { | 134 | if (reset) { |
148 | filelayout_set_lo_fail(data->lseg); | 135 | pnfs_set_lo_fail(data->lseg); |
149 | nfs4_reset_read(task, data); | 136 | nfs4_reset_read(task, data); |
150 | clp = NFS_SERVER(data->inode)->nfs_client; | 137 | clp = NFS_SERVER(data->inode)->nfs_client; |
151 | } | 138 | } |
@@ -170,7 +157,7 @@ filelayout_set_layoutcommit(struct nfs_write_data *wdata) | |||
170 | 157 | ||
171 | pnfs_set_layoutcommit(wdata); | 158 | pnfs_set_layoutcommit(wdata); |
172 | dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, wdata->inode->i_ino, | 159 | dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, wdata->inode->i_ino, |
173 | (unsigned long) wdata->lseg->pls_end_pos); | 160 | (unsigned long) NFS_I(wdata->inode)->layout->plh_lwb); |
174 | } | 161 | } |
175 | 162 | ||
176 | /* | 163 | /* |
@@ -221,7 +208,7 @@ static int filelayout_write_done_cb(struct rpc_task *task, | |||
221 | dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", | 208 | dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", |
222 | __func__, data->ds_clp, data->ds_clp->cl_session); | 209 | __func__, data->ds_clp, data->ds_clp->cl_session); |
223 | if (reset) { | 210 | if (reset) { |
224 | filelayout_set_lo_fail(data->lseg); | 211 | pnfs_set_lo_fail(data->lseg); |
225 | nfs4_reset_write(task, data); | 212 | nfs4_reset_write(task, data); |
226 | clp = NFS_SERVER(data->inode)->nfs_client; | 213 | clp = NFS_SERVER(data->inode)->nfs_client; |
227 | } else | 214 | } else |
@@ -256,7 +243,7 @@ static int filelayout_commit_done_cb(struct rpc_task *task, | |||
256 | __func__, data->ds_clp, data->ds_clp->cl_session); | 243 | __func__, data->ds_clp, data->ds_clp->cl_session); |
257 | if (reset) { | 244 | if (reset) { |
258 | prepare_to_resend_writes(data); | 245 | prepare_to_resend_writes(data); |
259 | filelayout_set_lo_fail(data->lseg); | 246 | pnfs_set_lo_fail(data->lseg); |
260 | } else | 247 | } else |
261 | nfs_restart_rpc(task, data->ds_clp); | 248 | nfs_restart_rpc(task, data->ds_clp); |
262 | return -EAGAIN; | 249 | return -EAGAIN; |
@@ -334,6 +321,9 @@ filelayout_read_pagelist(struct nfs_read_data *data) | |||
334 | __func__, data->inode->i_ino, | 321 | __func__, data->inode->i_ino, |
335 | data->args.pgbase, (size_t)data->args.count, offset); | 322 | data->args.pgbase, (size_t)data->args.count, offset); |
336 | 323 | ||
324 | if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags)) | ||
325 | return PNFS_NOT_ATTEMPTED; | ||
326 | |||
337 | /* Retrieve the correct rpc_client for the byte range */ | 327 | /* Retrieve the correct rpc_client for the byte range */ |
338 | j = nfs4_fl_calc_j_index(lseg, offset); | 328 | j = nfs4_fl_calc_j_index(lseg, offset); |
339 | idx = nfs4_fl_calc_ds_index(lseg, j); | 329 | idx = nfs4_fl_calc_ds_index(lseg, j); |
@@ -344,8 +334,7 @@ filelayout_read_pagelist(struct nfs_read_data *data) | |||
344 | set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); | 334 | set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); |
345 | return PNFS_NOT_ATTEMPTED; | 335 | return PNFS_NOT_ATTEMPTED; |
346 | } | 336 | } |
347 | dprintk("%s USE DS:ip %x %hu\n", __func__, | 337 | dprintk("%s USE DS: %s\n", __func__, ds->ds_remotestr); |
348 | ntohl(ds->ds_ip_addr), ntohs(ds->ds_port)); | ||
349 | 338 | ||
350 | /* No multipath support. Use first DS */ | 339 | /* No multipath support. Use first DS */ |
351 | data->ds_clp = ds->ds_clp; | 340 | data->ds_clp = ds->ds_clp; |
@@ -374,6 +363,9 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync) | |||
374 | struct nfs_fh *fh; | 363 | struct nfs_fh *fh; |
375 | int status; | 364 | int status; |
376 | 365 | ||
366 | if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags)) | ||
367 | return PNFS_NOT_ATTEMPTED; | ||
368 | |||
377 | /* Retrieve the correct rpc_client for the byte range */ | 369 | /* Retrieve the correct rpc_client for the byte range */ |
378 | j = nfs4_fl_calc_j_index(lseg, offset); | 370 | j = nfs4_fl_calc_j_index(lseg, offset); |
379 | idx = nfs4_fl_calc_ds_index(lseg, j); | 371 | idx = nfs4_fl_calc_ds_index(lseg, j); |
@@ -384,9 +376,9 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync) | |||
384 | set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); | 376 | set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); |
385 | return PNFS_NOT_ATTEMPTED; | 377 | return PNFS_NOT_ATTEMPTED; |
386 | } | 378 | } |
387 | dprintk("%s ino %lu sync %d req %Zu@%llu DS:%x:%hu\n", __func__, | 379 | dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s\n", __func__, |
388 | data->inode->i_ino, sync, (size_t) data->args.count, offset, | 380 | data->inode->i_ino, sync, (size_t) data->args.count, offset, |
389 | ntohl(ds->ds_ip_addr), ntohs(ds->ds_port)); | 381 | ds->ds_remotestr); |
390 | 382 | ||
391 | data->write_done_cb = filelayout_write_done_cb; | 383 | data->write_done_cb = filelayout_write_done_cb; |
392 | data->ds_clp = ds->ds_clp; | 384 | data->ds_clp = ds->ds_clp; |
@@ -428,6 +420,14 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, | |||
428 | 420 | ||
429 | dprintk("--> %s\n", __func__); | 421 | dprintk("--> %s\n", __func__); |
430 | 422 | ||
423 | /* FIXME: remove this check when layout segment support is added */ | ||
424 | if (lgr->range.offset != 0 || | ||
425 | lgr->range.length != NFS4_MAX_UINT64) { | ||
426 | dprintk("%s Only whole file layouts supported. Use MDS i/o\n", | ||
427 | __func__); | ||
428 | goto out; | ||
429 | } | ||
430 | |||
431 | if (fl->pattern_offset > lgr->range.offset) { | 431 | if (fl->pattern_offset > lgr->range.offset) { |
432 | dprintk("%s pattern_offset %lld too large\n", | 432 | dprintk("%s pattern_offset %lld too large\n", |
433 | __func__, fl->pattern_offset); | 433 | __func__, fl->pattern_offset); |
@@ -449,6 +449,10 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, | |||
449 | goto out; | 449 | goto out; |
450 | } else | 450 | } else |
451 | dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node); | 451 | dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node); |
452 | /* Found deviceid is being reaped */ | ||
453 | if (test_bit(NFS_DEVICEID_INVALID, &dsaddr->id_node.flags)) | ||
454 | goto out_put; | ||
455 | |||
452 | fl->dsaddr = dsaddr; | 456 | fl->dsaddr = dsaddr; |
453 | 457 | ||
454 | if (fl->first_stripe_index < 0 || | 458 | if (fl->first_stripe_index < 0 || |
@@ -659,7 +663,7 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, | |||
659 | * return true : coalesce page | 663 | * return true : coalesce page |
660 | * return false : don't coalesce page | 664 | * return false : don't coalesce page |
661 | */ | 665 | */ |
662 | bool | 666 | static bool |
663 | filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, | 667 | filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, |
664 | struct nfs_page *req) | 668 | struct nfs_page *req) |
665 | { | 669 | { |
@@ -670,8 +674,6 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, | |||
670 | !nfs_generic_pg_test(pgio, prev, req)) | 674 | !nfs_generic_pg_test(pgio, prev, req)) |
671 | return false; | 675 | return false; |
672 | 676 | ||
673 | if (!pgio->pg_lseg) | ||
674 | return 1; | ||
675 | p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT; | 677 | p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT; |
676 | r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT; | 678 | r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT; |
677 | stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit; | 679 | stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit; |
@@ -682,6 +684,52 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, | |||
682 | return (p_stripe == r_stripe); | 684 | return (p_stripe == r_stripe); |
683 | } | 685 | } |
684 | 686 | ||
687 | void | ||
688 | filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, | ||
689 | struct nfs_page *req) | ||
690 | { | ||
691 | BUG_ON(pgio->pg_lseg != NULL); | ||
692 | |||
693 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | ||
694 | req->wb_context, | ||
695 | 0, | ||
696 | NFS4_MAX_UINT64, | ||
697 | IOMODE_READ, | ||
698 | GFP_KERNEL); | ||
699 | /* If no lseg, fall back to read through mds */ | ||
700 | if (pgio->pg_lseg == NULL) | ||
701 | nfs_pageio_reset_read_mds(pgio); | ||
702 | } | ||
703 | |||
704 | void | ||
705 | filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, | ||
706 | struct nfs_page *req) | ||
707 | { | ||
708 | BUG_ON(pgio->pg_lseg != NULL); | ||
709 | |||
710 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | ||
711 | req->wb_context, | ||
712 | 0, | ||
713 | NFS4_MAX_UINT64, | ||
714 | IOMODE_RW, | ||
715 | GFP_NOFS); | ||
716 | /* If no lseg, fall back to write through mds */ | ||
717 | if (pgio->pg_lseg == NULL) | ||
718 | nfs_pageio_reset_write_mds(pgio); | ||
719 | } | ||
720 | |||
721 | static const struct nfs_pageio_ops filelayout_pg_read_ops = { | ||
722 | .pg_init = filelayout_pg_init_read, | ||
723 | .pg_test = filelayout_pg_test, | ||
724 | .pg_doio = pnfs_generic_pg_readpages, | ||
725 | }; | ||
726 | |||
727 | static const struct nfs_pageio_ops filelayout_pg_write_ops = { | ||
728 | .pg_init = filelayout_pg_init_write, | ||
729 | .pg_test = filelayout_pg_test, | ||
730 | .pg_doio = pnfs_generic_pg_writepages, | ||
731 | }; | ||
732 | |||
685 | static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg) | 733 | static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg) |
686 | { | 734 | { |
687 | return !FILELAYOUT_LSEG(lseg)->commit_through_mds; | 735 | return !FILELAYOUT_LSEG(lseg)->commit_through_mds; |
@@ -879,7 +927,8 @@ static struct pnfs_layoutdriver_type filelayout_type = { | |||
879 | .owner = THIS_MODULE, | 927 | .owner = THIS_MODULE, |
880 | .alloc_lseg = filelayout_alloc_lseg, | 928 | .alloc_lseg = filelayout_alloc_lseg, |
881 | .free_lseg = filelayout_free_lseg, | 929 | .free_lseg = filelayout_free_lseg, |
882 | .pg_test = filelayout_pg_test, | 930 | .pg_read_ops = &filelayout_pg_read_ops, |
931 | .pg_write_ops = &filelayout_pg_write_ops, | ||
883 | .mark_pnfs_commit = filelayout_mark_pnfs_commit, | 932 | .mark_pnfs_commit = filelayout_mark_pnfs_commit, |
884 | .choose_commit_list = filelayout_choose_commit_list, | 933 | .choose_commit_list = filelayout_choose_commit_list, |
885 | .commit_pagelist = filelayout_commit_pagelist, | 934 | .commit_pagelist = filelayout_commit_pagelist, |
@@ -902,5 +951,7 @@ static void __exit nfs4filelayout_exit(void) | |||
902 | pnfs_unregister_layoutdriver(&filelayout_type); | 951 | pnfs_unregister_layoutdriver(&filelayout_type); |
903 | } | 952 | } |
904 | 953 | ||
954 | MODULE_ALIAS("nfs-layouttype4-1"); | ||
955 | |||
905 | module_init(nfs4filelayout_init); | 956 | module_init(nfs4filelayout_init); |
906 | module_exit(nfs4filelayout_exit); | 957 | module_exit(nfs4filelayout_exit); |
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index cebe01e3795..2e42284253f 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h | |||
@@ -47,10 +47,17 @@ enum stripetype4 { | |||
47 | }; | 47 | }; |
48 | 48 | ||
49 | /* Individual ip address */ | 49 | /* Individual ip address */ |
50 | struct nfs4_pnfs_ds_addr { | ||
51 | struct sockaddr_storage da_addr; | ||
52 | size_t da_addrlen; | ||
53 | struct list_head da_node; /* nfs4_pnfs_dev_hlist dev_dslist */ | ||
54 | char *da_remotestr; /* human readable addr+port */ | ||
55 | }; | ||
56 | |||
50 | struct nfs4_pnfs_ds { | 57 | struct nfs4_pnfs_ds { |
51 | struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */ | 58 | struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */ |
52 | u32 ds_ip_addr; | 59 | char *ds_remotestr; /* comma sep list of addrs */ |
53 | u32 ds_port; | 60 | struct list_head ds_addrs; |
54 | struct nfs_client *ds_clp; | 61 | struct nfs_client *ds_clp; |
55 | atomic_t ds_count; | 62 | atomic_t ds_count; |
56 | }; | 63 | }; |
@@ -89,6 +96,12 @@ FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg) | |||
89 | generic_hdr); | 96 | generic_hdr); |
90 | } | 97 | } |
91 | 98 | ||
99 | static inline struct nfs4_deviceid_node * | ||
100 | FILELAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg) | ||
101 | { | ||
102 | return &FILELAYOUT_LSEG(lseg)->dsaddr->id_node; | ||
103 | } | ||
104 | |||
92 | extern struct nfs_fh * | 105 | extern struct nfs_fh * |
93 | nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j); | 106 | nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j); |
94 | 107 | ||
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index 3b7bf137726..ed388aae968 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c | |||
@@ -56,54 +56,139 @@ print_ds(struct nfs4_pnfs_ds *ds) | |||
56 | printk("%s NULL device\n", __func__); | 56 | printk("%s NULL device\n", __func__); |
57 | return; | 57 | return; |
58 | } | 58 | } |
59 | printk(" ip_addr %x port %hu\n" | 59 | printk(" ds %s\n" |
60 | " ref count %d\n" | 60 | " ref count %d\n" |
61 | " client %p\n" | 61 | " client %p\n" |
62 | " cl_exchange_flags %x\n", | 62 | " cl_exchange_flags %x\n", |
63 | ntohl(ds->ds_ip_addr), ntohs(ds->ds_port), | 63 | ds->ds_remotestr, |
64 | atomic_read(&ds->ds_count), ds->ds_clp, | 64 | atomic_read(&ds->ds_count), ds->ds_clp, |
65 | ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0); | 65 | ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0); |
66 | } | 66 | } |
67 | 67 | ||
68 | /* nfs4_ds_cache_lock is held */ | 68 | static bool |
69 | static struct nfs4_pnfs_ds * | 69 | same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2) |
70 | _data_server_lookup_locked(u32 ip_addr, u32 port) | ||
71 | { | 70 | { |
72 | struct nfs4_pnfs_ds *ds; | 71 | struct sockaddr_in *a, *b; |
72 | struct sockaddr_in6 *a6, *b6; | ||
73 | |||
74 | if (addr1->sa_family != addr2->sa_family) | ||
75 | return false; | ||
76 | |||
77 | switch (addr1->sa_family) { | ||
78 | case AF_INET: | ||
79 | a = (struct sockaddr_in *)addr1; | ||
80 | b = (struct sockaddr_in *)addr2; | ||
81 | |||
82 | if (a->sin_addr.s_addr == b->sin_addr.s_addr && | ||
83 | a->sin_port == b->sin_port) | ||
84 | return true; | ||
85 | break; | ||
86 | |||
87 | case AF_INET6: | ||
88 | a6 = (struct sockaddr_in6 *)addr1; | ||
89 | b6 = (struct sockaddr_in6 *)addr2; | ||
90 | |||
91 | /* LINKLOCAL addresses must have matching scope_id */ | ||
92 | if (ipv6_addr_scope(&a6->sin6_addr) == | ||
93 | IPV6_ADDR_SCOPE_LINKLOCAL && | ||
94 | a6->sin6_scope_id != b6->sin6_scope_id) | ||
95 | return false; | ||
96 | |||
97 | if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) && | ||
98 | a6->sin6_port == b6->sin6_port) | ||
99 | return true; | ||
100 | break; | ||
101 | |||
102 | default: | ||
103 | dprintk("%s: unhandled address family: %u\n", | ||
104 | __func__, addr1->sa_family); | ||
105 | return false; | ||
106 | } | ||
73 | 107 | ||
74 | dprintk("_data_server_lookup: ip_addr=%x port=%hu\n", | 108 | return false; |
75 | ntohl(ip_addr), ntohs(port)); | 109 | } |
76 | 110 | ||
77 | list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) { | 111 | /* |
78 | if (ds->ds_ip_addr == ip_addr && | 112 | * Lookup DS by addresses. The first matching address returns true. |
79 | ds->ds_port == port) { | 113 | * nfs4_ds_cache_lock is held |
80 | return ds; | 114 | */ |
115 | static struct nfs4_pnfs_ds * | ||
116 | _data_server_lookup_locked(struct list_head *dsaddrs) | ||
117 | { | ||
118 | struct nfs4_pnfs_ds *ds; | ||
119 | struct nfs4_pnfs_ds_addr *da1, *da2; | ||
120 | |||
121 | list_for_each_entry(da1, dsaddrs, da_node) { | ||
122 | list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) { | ||
123 | list_for_each_entry(da2, &ds->ds_addrs, da_node) { | ||
124 | if (same_sockaddr( | ||
125 | (struct sockaddr *)&da1->da_addr, | ||
126 | (struct sockaddr *)&da2->da_addr)) | ||
127 | return ds; | ||
128 | } | ||
81 | } | 129 | } |
82 | } | 130 | } |
83 | return NULL; | 131 | return NULL; |
84 | } | 132 | } |
85 | 133 | ||
86 | /* | 134 | /* |
135 | * Compare two lists of addresses. | ||
136 | */ | ||
137 | static bool | ||
138 | _data_server_match_all_addrs_locked(struct list_head *dsaddrs1, | ||
139 | struct list_head *dsaddrs2) | ||
140 | { | ||
141 | struct nfs4_pnfs_ds_addr *da1, *da2; | ||
142 | size_t count1 = 0, | ||
143 | count2 = 0; | ||
144 | |||
145 | list_for_each_entry(da1, dsaddrs1, da_node) | ||
146 | count1++; | ||
147 | |||
148 | list_for_each_entry(da2, dsaddrs2, da_node) { | ||
149 | bool found = false; | ||
150 | count2++; | ||
151 | list_for_each_entry(da1, dsaddrs1, da_node) { | ||
152 | if (same_sockaddr((struct sockaddr *)&da1->da_addr, | ||
153 | (struct sockaddr *)&da2->da_addr)) { | ||
154 | found = true; | ||
155 | break; | ||
156 | } | ||
157 | } | ||
158 | if (!found) | ||
159 | return false; | ||
160 | } | ||
161 | |||
162 | return (count1 == count2); | ||
163 | } | ||
164 | |||
165 | /* | ||
87 | * Create an rpc connection to the nfs4_pnfs_ds data server | 166 | * Create an rpc connection to the nfs4_pnfs_ds data server |
88 | * Currently only support IPv4 | 167 | * Currently only supports IPv4 and IPv6 addresses |
89 | */ | 168 | */ |
90 | static int | 169 | static int |
91 | nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) | 170 | nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) |
92 | { | 171 | { |
93 | struct nfs_client *clp; | 172 | struct nfs_client *clp = ERR_PTR(-EIO); |
94 | struct sockaddr_in sin; | 173 | struct nfs4_pnfs_ds_addr *da; |
95 | int status = 0; | 174 | int status = 0; |
96 | 175 | ||
97 | dprintk("--> %s ip:port %x:%hu au_flavor %d\n", __func__, | 176 | dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr, |
98 | ntohl(ds->ds_ip_addr), ntohs(ds->ds_port), | ||
99 | mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor); | 177 | mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor); |
100 | 178 | ||
101 | sin.sin_family = AF_INET; | 179 | BUG_ON(list_empty(&ds->ds_addrs)); |
102 | sin.sin_addr.s_addr = ds->ds_ip_addr; | 180 | |
103 | sin.sin_port = ds->ds_port; | 181 | list_for_each_entry(da, &ds->ds_addrs, da_node) { |
182 | dprintk("%s: DS %s: trying address %s\n", | ||
183 | __func__, ds->ds_remotestr, da->da_remotestr); | ||
184 | |||
185 | clp = nfs4_set_ds_client(mds_srv->nfs_client, | ||
186 | (struct sockaddr *)&da->da_addr, | ||
187 | da->da_addrlen, IPPROTO_TCP); | ||
188 | if (!IS_ERR(clp)) | ||
189 | break; | ||
190 | } | ||
104 | 191 | ||
105 | clp = nfs4_set_ds_client(mds_srv->nfs_client, (struct sockaddr *)&sin, | ||
106 | sizeof(sin), IPPROTO_TCP); | ||
107 | if (IS_ERR(clp)) { | 192 | if (IS_ERR(clp)) { |
108 | status = PTR_ERR(clp); | 193 | status = PTR_ERR(clp); |
109 | goto out; | 194 | goto out; |
@@ -115,8 +200,8 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) | |||
115 | goto out_put; | 200 | goto out_put; |
116 | } | 201 | } |
117 | ds->ds_clp = clp; | 202 | ds->ds_clp = clp; |
118 | dprintk("%s [existing] ip=%x, port=%hu\n", __func__, | 203 | dprintk("%s [existing] server=%s\n", __func__, |
119 | ntohl(ds->ds_ip_addr), ntohs(ds->ds_port)); | 204 | ds->ds_remotestr); |
120 | goto out; | 205 | goto out; |
121 | } | 206 | } |
122 | 207 | ||
@@ -135,8 +220,7 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) | |||
135 | goto out_put; | 220 | goto out_put; |
136 | 221 | ||
137 | ds->ds_clp = clp; | 222 | ds->ds_clp = clp; |
138 | dprintk("%s [new] ip=%x, port=%hu\n", __func__, ntohl(ds->ds_ip_addr), | 223 | dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr); |
139 | ntohs(ds->ds_port)); | ||
140 | out: | 224 | out: |
141 | return status; | 225 | return status; |
142 | out_put: | 226 | out_put: |
@@ -147,12 +231,25 @@ out_put: | |||
147 | static void | 231 | static void |
148 | destroy_ds(struct nfs4_pnfs_ds *ds) | 232 | destroy_ds(struct nfs4_pnfs_ds *ds) |
149 | { | 233 | { |
234 | struct nfs4_pnfs_ds_addr *da; | ||
235 | |||
150 | dprintk("--> %s\n", __func__); | 236 | dprintk("--> %s\n", __func__); |
151 | ifdebug(FACILITY) | 237 | ifdebug(FACILITY) |
152 | print_ds(ds); | 238 | print_ds(ds); |
153 | 239 | ||
154 | if (ds->ds_clp) | 240 | if (ds->ds_clp) |
155 | nfs_put_client(ds->ds_clp); | 241 | nfs_put_client(ds->ds_clp); |
242 | |||
243 | while (!list_empty(&ds->ds_addrs)) { | ||
244 | da = list_first_entry(&ds->ds_addrs, | ||
245 | struct nfs4_pnfs_ds_addr, | ||
246 | da_node); | ||
247 | list_del_init(&da->da_node); | ||
248 | kfree(da->da_remotestr); | ||
249 | kfree(da); | ||
250 | } | ||
251 | |||
252 | kfree(ds->ds_remotestr); | ||
156 | kfree(ds); | 253 | kfree(ds); |
157 | } | 254 | } |
158 | 255 | ||
@@ -179,31 +276,96 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) | |||
179 | kfree(dsaddr); | 276 | kfree(dsaddr); |
180 | } | 277 | } |
181 | 278 | ||
279 | /* | ||
280 | * Create a string with a human readable address and port to avoid | ||
281 | * complicated setup around many dprinks. | ||
282 | */ | ||
283 | static char * | ||
284 | nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags) | ||
285 | { | ||
286 | struct nfs4_pnfs_ds_addr *da; | ||
287 | char *remotestr; | ||
288 | size_t len; | ||
289 | char *p; | ||
290 | |||
291 | len = 3; /* '{', '}' and eol */ | ||
292 | list_for_each_entry(da, dsaddrs, da_node) { | ||
293 | len += strlen(da->da_remotestr) + 1; /* string plus comma */ | ||
294 | } | ||
295 | |||
296 | remotestr = kzalloc(len, gfp_flags); | ||
297 | if (!remotestr) | ||
298 | return NULL; | ||
299 | |||
300 | p = remotestr; | ||
301 | *(p++) = '{'; | ||
302 | len--; | ||
303 | list_for_each_entry(da, dsaddrs, da_node) { | ||
304 | size_t ll = strlen(da->da_remotestr); | ||
305 | |||
306 | if (ll > len) | ||
307 | goto out_err; | ||
308 | |||
309 | memcpy(p, da->da_remotestr, ll); | ||
310 | p += ll; | ||
311 | len -= ll; | ||
312 | |||
313 | if (len < 1) | ||
314 | goto out_err; | ||
315 | (*p++) = ','; | ||
316 | len--; | ||
317 | } | ||
318 | if (len < 2) | ||
319 | goto out_err; | ||
320 | *(p++) = '}'; | ||
321 | *p = '\0'; | ||
322 | return remotestr; | ||
323 | out_err: | ||
324 | kfree(remotestr); | ||
325 | return NULL; | ||
326 | } | ||
327 | |||
182 | static struct nfs4_pnfs_ds * | 328 | static struct nfs4_pnfs_ds * |
183 | nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port, gfp_t gfp_flags) | 329 | nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags) |
184 | { | 330 | { |
185 | struct nfs4_pnfs_ds *tmp_ds, *ds; | 331 | struct nfs4_pnfs_ds *tmp_ds, *ds = NULL; |
332 | char *remotestr; | ||
186 | 333 | ||
187 | ds = kzalloc(sizeof(*tmp_ds), gfp_flags); | 334 | if (list_empty(dsaddrs)) { |
335 | dprintk("%s: no addresses defined\n", __func__); | ||
336 | goto out; | ||
337 | } | ||
338 | |||
339 | ds = kzalloc(sizeof(*ds), gfp_flags); | ||
188 | if (!ds) | 340 | if (!ds) |
189 | goto out; | 341 | goto out; |
190 | 342 | ||
343 | /* this is only used for debugging, so it's ok if its NULL */ | ||
344 | remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags); | ||
345 | |||
191 | spin_lock(&nfs4_ds_cache_lock); | 346 | spin_lock(&nfs4_ds_cache_lock); |
192 | tmp_ds = _data_server_lookup_locked(ip_addr, port); | 347 | tmp_ds = _data_server_lookup_locked(dsaddrs); |
193 | if (tmp_ds == NULL) { | 348 | if (tmp_ds == NULL) { |
194 | ds->ds_ip_addr = ip_addr; | 349 | INIT_LIST_HEAD(&ds->ds_addrs); |
195 | ds->ds_port = port; | 350 | list_splice_init(dsaddrs, &ds->ds_addrs); |
351 | ds->ds_remotestr = remotestr; | ||
196 | atomic_set(&ds->ds_count, 1); | 352 | atomic_set(&ds->ds_count, 1); |
197 | INIT_LIST_HEAD(&ds->ds_node); | 353 | INIT_LIST_HEAD(&ds->ds_node); |
198 | ds->ds_clp = NULL; | 354 | ds->ds_clp = NULL; |
199 | list_add(&ds->ds_node, &nfs4_data_server_cache); | 355 | list_add(&ds->ds_node, &nfs4_data_server_cache); |
200 | dprintk("%s add new data server ip 0x%x\n", __func__, | 356 | dprintk("%s add new data server %s\n", __func__, |
201 | ds->ds_ip_addr); | 357 | ds->ds_remotestr); |
202 | } else { | 358 | } else { |
359 | if (!_data_server_match_all_addrs_locked(&tmp_ds->ds_addrs, | ||
360 | dsaddrs)) { | ||
361 | dprintk("%s: multipath address mismatch: %s != %s", | ||
362 | __func__, tmp_ds->ds_remotestr, remotestr); | ||
363 | } | ||
364 | kfree(remotestr); | ||
203 | kfree(ds); | 365 | kfree(ds); |
204 | atomic_inc(&tmp_ds->ds_count); | 366 | atomic_inc(&tmp_ds->ds_count); |
205 | dprintk("%s data server found ip 0x%x, inc'ed ds_count to %d\n", | 367 | dprintk("%s data server %s found, inc'ed ds_count to %d\n", |
206 | __func__, tmp_ds->ds_ip_addr, | 368 | __func__, tmp_ds->ds_remotestr, |
207 | atomic_read(&tmp_ds->ds_count)); | 369 | atomic_read(&tmp_ds->ds_count)); |
208 | ds = tmp_ds; | 370 | ds = tmp_ds; |
209 | } | 371 | } |
@@ -213,18 +375,22 @@ out: | |||
213 | } | 375 | } |
214 | 376 | ||
215 | /* | 377 | /* |
216 | * Currently only support ipv4, and one multi-path address. | 378 | * Currently only supports ipv4, ipv6 and one multi-path address. |
217 | */ | 379 | */ |
218 | static struct nfs4_pnfs_ds * | 380 | static struct nfs4_pnfs_ds_addr * |
219 | decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_flags) | 381 | decode_ds_addr(struct xdr_stream *streamp, gfp_t gfp_flags) |
220 | { | 382 | { |
221 | struct nfs4_pnfs_ds *ds = NULL; | 383 | struct nfs4_pnfs_ds_addr *da = NULL; |
222 | char *buf; | 384 | char *buf, *portstr; |
223 | const char *ipend, *pstr; | 385 | u32 port; |
224 | u32 ip_addr, port; | 386 | int nlen, rlen; |
225 | int nlen, rlen, i; | ||
226 | int tmp[2]; | 387 | int tmp[2]; |
227 | __be32 *p; | 388 | __be32 *p; |
389 | char *netid, *match_netid; | ||
390 | size_t len, match_netid_len; | ||
391 | char *startsep = ""; | ||
392 | char *endsep = ""; | ||
393 | |||
228 | 394 | ||
229 | /* r_netid */ | 395 | /* r_netid */ |
230 | p = xdr_inline_decode(streamp, 4); | 396 | p = xdr_inline_decode(streamp, 4); |
@@ -236,64 +402,123 @@ decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_fla | |||
236 | if (unlikely(!p)) | 402 | if (unlikely(!p)) |
237 | goto out_err; | 403 | goto out_err; |
238 | 404 | ||
239 | /* Check that netid is "tcp" */ | 405 | netid = kmalloc(nlen+1, gfp_flags); |
240 | if (nlen != 3 || memcmp((char *)p, "tcp", 3)) { | 406 | if (unlikely(!netid)) |
241 | dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__); | ||
242 | goto out_err; | 407 | goto out_err; |
243 | } | ||
244 | 408 | ||
245 | /* r_addr */ | 409 | netid[nlen] = '\0'; |
410 | memcpy(netid, p, nlen); | ||
411 | |||
412 | /* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */ | ||
246 | p = xdr_inline_decode(streamp, 4); | 413 | p = xdr_inline_decode(streamp, 4); |
247 | if (unlikely(!p)) | 414 | if (unlikely(!p)) |
248 | goto out_err; | 415 | goto out_free_netid; |
249 | rlen = be32_to_cpup(p); | 416 | rlen = be32_to_cpup(p); |
250 | 417 | ||
251 | p = xdr_inline_decode(streamp, rlen); | 418 | p = xdr_inline_decode(streamp, rlen); |
252 | if (unlikely(!p)) | 419 | if (unlikely(!p)) |
253 | goto out_err; | 420 | goto out_free_netid; |
254 | 421 | ||
255 | /* ipv6 length plus port is legal */ | 422 | /* port is ".ABC.DEF", 8 chars max */ |
256 | if (rlen > INET6_ADDRSTRLEN + 8) { | 423 | if (rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8) { |
257 | dprintk("%s: Invalid address, length %d\n", __func__, | 424 | dprintk("%s: Invalid address, length %d\n", __func__, |
258 | rlen); | 425 | rlen); |
259 | goto out_err; | 426 | goto out_free_netid; |
260 | } | 427 | } |
261 | buf = kmalloc(rlen + 1, gfp_flags); | 428 | buf = kmalloc(rlen + 1, gfp_flags); |
262 | if (!buf) { | 429 | if (!buf) { |
263 | dprintk("%s: Not enough memory\n", __func__); | 430 | dprintk("%s: Not enough memory\n", __func__); |
264 | goto out_err; | 431 | goto out_free_netid; |
265 | } | 432 | } |
266 | buf[rlen] = '\0'; | 433 | buf[rlen] = '\0'; |
267 | memcpy(buf, p, rlen); | 434 | memcpy(buf, p, rlen); |
268 | 435 | ||
269 | /* replace the port dots with dashes for the in4_pton() delimiter*/ | 436 | /* replace port '.' with '-' */ |
270 | for (i = 0; i < 2; i++) { | 437 | portstr = strrchr(buf, '.'); |
271 | char *res = strrchr(buf, '.'); | 438 | if (!portstr) { |
272 | if (!res) { | 439 | dprintk("%s: Failed finding expected dot in port\n", |
273 | dprintk("%s: Failed finding expected dots in port\n", | 440 | __func__); |
274 | __func__); | 441 | goto out_free_buf; |
275 | goto out_free; | 442 | } |
276 | } | 443 | *portstr = '-'; |
277 | *res = '-'; | 444 | |
445 | /* find '.' between address and port */ | ||
446 | portstr = strrchr(buf, '.'); | ||
447 | if (!portstr) { | ||
448 | dprintk("%s: Failed finding expected dot between address and " | ||
449 | "port\n", __func__); | ||
450 | goto out_free_buf; | ||
278 | } | 451 | } |
452 | *portstr = '\0'; | ||
279 | 453 | ||
280 | /* Currently only support ipv4 address */ | 454 | da = kzalloc(sizeof(*da), gfp_flags); |
281 | if (in4_pton(buf, rlen, (u8 *)&ip_addr, '-', &ipend) == 0) { | 455 | if (unlikely(!da)) |
282 | dprintk("%s: Only ipv4 addresses supported\n", __func__); | 456 | goto out_free_buf; |
283 | goto out_free; | 457 | |
458 | INIT_LIST_HEAD(&da->da_node); | ||
459 | |||
460 | if (!rpc_pton(buf, portstr-buf, (struct sockaddr *)&da->da_addr, | ||
461 | sizeof(da->da_addr))) { | ||
462 | dprintk("%s: error parsing address %s\n", __func__, buf); | ||
463 | goto out_free_da; | ||
284 | } | 464 | } |
285 | 465 | ||
286 | /* port */ | 466 | portstr++; |
287 | pstr = ipend; | 467 | sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]); |
288 | sscanf(pstr, "-%d-%d", &tmp[0], &tmp[1]); | ||
289 | port = htons((tmp[0] << 8) | (tmp[1])); | 468 | port = htons((tmp[0] << 8) | (tmp[1])); |
290 | 469 | ||
291 | ds = nfs4_pnfs_ds_add(inode, ip_addr, port, gfp_flags); | 470 | switch (da->da_addr.ss_family) { |
292 | dprintk("%s: Decoded address and port %s\n", __func__, buf); | 471 | case AF_INET: |
293 | out_free: | 472 | ((struct sockaddr_in *)&da->da_addr)->sin_port = port; |
473 | da->da_addrlen = sizeof(struct sockaddr_in); | ||
474 | match_netid = "tcp"; | ||
475 | match_netid_len = 3; | ||
476 | break; | ||
477 | |||
478 | case AF_INET6: | ||
479 | ((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port; | ||
480 | da->da_addrlen = sizeof(struct sockaddr_in6); | ||
481 | match_netid = "tcp6"; | ||
482 | match_netid_len = 4; | ||
483 | startsep = "["; | ||
484 | endsep = "]"; | ||
485 | break; | ||
486 | |||
487 | default: | ||
488 | dprintk("%s: unsupported address family: %u\n", | ||
489 | __func__, da->da_addr.ss_family); | ||
490 | goto out_free_da; | ||
491 | } | ||
492 | |||
493 | if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) { | ||
494 | dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n", | ||
495 | __func__, netid, match_netid); | ||
496 | goto out_free_da; | ||
497 | } | ||
498 | |||
499 | /* save human readable address */ | ||
500 | len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7; | ||
501 | da->da_remotestr = kzalloc(len, gfp_flags); | ||
502 | |||
503 | /* NULL is ok, only used for dprintk */ | ||
504 | if (da->da_remotestr) | ||
505 | snprintf(da->da_remotestr, len, "%s%s%s:%u", startsep, | ||
506 | buf, endsep, ntohs(port)); | ||
507 | |||
508 | dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr); | ||
294 | kfree(buf); | 509 | kfree(buf); |
510 | kfree(netid); | ||
511 | return da; | ||
512 | |||
513 | out_free_da: | ||
514 | kfree(da); | ||
515 | out_free_buf: | ||
516 | dprintk("%s: Error parsing DS addr: %s\n", __func__, buf); | ||
517 | kfree(buf); | ||
518 | out_free_netid: | ||
519 | kfree(netid); | ||
295 | out_err: | 520 | out_err: |
296 | return ds; | 521 | return NULL; |
297 | } | 522 | } |
298 | 523 | ||
299 | /* Decode opaque device data and return the result */ | 524 | /* Decode opaque device data and return the result */ |
@@ -310,6 +535,8 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) | |||
310 | struct xdr_stream stream; | 535 | struct xdr_stream stream; |
311 | struct xdr_buf buf; | 536 | struct xdr_buf buf; |
312 | struct page *scratch; | 537 | struct page *scratch; |
538 | struct list_head dsaddrs; | ||
539 | struct nfs4_pnfs_ds_addr *da; | ||
313 | 540 | ||
314 | /* set up xdr stream */ | 541 | /* set up xdr stream */ |
315 | scratch = alloc_page(gfp_flags); | 542 | scratch = alloc_page(gfp_flags); |
@@ -386,6 +613,8 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) | |||
386 | NFS_SERVER(ino)->nfs_client, | 613 | NFS_SERVER(ino)->nfs_client, |
387 | &pdev->dev_id); | 614 | &pdev->dev_id); |
388 | 615 | ||
616 | INIT_LIST_HEAD(&dsaddrs); | ||
617 | |||
389 | for (i = 0; i < dsaddr->ds_num; i++) { | 618 | for (i = 0; i < dsaddr->ds_num; i++) { |
390 | int j; | 619 | int j; |
391 | u32 mp_count; | 620 | u32 mp_count; |
@@ -395,48 +624,43 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) | |||
395 | goto out_err_free_deviceid; | 624 | goto out_err_free_deviceid; |
396 | 625 | ||
397 | mp_count = be32_to_cpup(p); /* multipath count */ | 626 | mp_count = be32_to_cpup(p); /* multipath count */ |
398 | if (mp_count > 1) { | ||
399 | printk(KERN_WARNING | ||
400 | "%s: Multipath count %d not supported, " | ||
401 | "skipping all greater than 1\n", __func__, | ||
402 | mp_count); | ||
403 | } | ||
404 | for (j = 0; j < mp_count; j++) { | 627 | for (j = 0; j < mp_count; j++) { |
405 | if (j == 0) { | 628 | da = decode_ds_addr(&stream, gfp_flags); |
406 | dsaddr->ds_list[i] = decode_and_add_ds(&stream, | 629 | if (da) |
407 | ino, gfp_flags); | 630 | list_add_tail(&da->da_node, &dsaddrs); |
408 | if (dsaddr->ds_list[i] == NULL) | 631 | } |
409 | goto out_err_free_deviceid; | 632 | if (list_empty(&dsaddrs)) { |
410 | } else { | 633 | dprintk("%s: no suitable DS addresses found\n", |
411 | u32 len; | 634 | __func__); |
412 | /* skip extra multipath */ | 635 | goto out_err_free_deviceid; |
413 | 636 | } | |
414 | /* read len, skip */ | 637 | |
415 | p = xdr_inline_decode(&stream, 4); | 638 | dsaddr->ds_list[i] = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags); |
416 | if (unlikely(!p)) | 639 | if (!dsaddr->ds_list[i]) |
417 | goto out_err_free_deviceid; | 640 | goto out_err_drain_dsaddrs; |
418 | len = be32_to_cpup(p); | 641 | |
419 | 642 | /* If DS was already in cache, free ds addrs */ | |
420 | p = xdr_inline_decode(&stream, len); | 643 | while (!list_empty(&dsaddrs)) { |
421 | if (unlikely(!p)) | 644 | da = list_first_entry(&dsaddrs, |
422 | goto out_err_free_deviceid; | 645 | struct nfs4_pnfs_ds_addr, |
423 | 646 | da_node); | |
424 | /* read len, skip */ | 647 | list_del_init(&da->da_node); |
425 | p = xdr_inline_decode(&stream, 4); | 648 | kfree(da->da_remotestr); |
426 | if (unlikely(!p)) | 649 | kfree(da); |
427 | goto out_err_free_deviceid; | ||
428 | len = be32_to_cpup(p); | ||
429 | |||
430 | p = xdr_inline_decode(&stream, len); | ||
431 | if (unlikely(!p)) | ||
432 | goto out_err_free_deviceid; | ||
433 | } | ||
434 | } | 650 | } |
435 | } | 651 | } |
436 | 652 | ||
437 | __free_page(scratch); | 653 | __free_page(scratch); |
438 | return dsaddr; | 654 | return dsaddr; |
439 | 655 | ||
656 | out_err_drain_dsaddrs: | ||
657 | while (!list_empty(&dsaddrs)) { | ||
658 | da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr, | ||
659 | da_node); | ||
660 | list_del_init(&da->da_node); | ||
661 | kfree(da->da_remotestr); | ||
662 | kfree(da); | ||
663 | } | ||
440 | out_err_free_deviceid: | 664 | out_err_free_deviceid: |
441 | nfs4_fl_free_deviceid(dsaddr); | 665 | nfs4_fl_free_deviceid(dsaddr); |
442 | /* stripe_indicies was part of dsaddr */ | 666 | /* stripe_indicies was part of dsaddr */ |
@@ -591,13 +815,13 @@ nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j) | |||
591 | 815 | ||
592 | static void | 816 | static void |
593 | filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr, | 817 | filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr, |
594 | int err, u32 ds_addr) | 818 | int err, const char *ds_remotestr) |
595 | { | 819 | { |
596 | u32 *p = (u32 *)&dsaddr->id_node.deviceid; | 820 | u32 *p = (u32 *)&dsaddr->id_node.deviceid; |
597 | 821 | ||
598 | printk(KERN_ERR "NFS: data server %x connection error %d." | 822 | printk(KERN_ERR "NFS: data server %s connection error %d." |
599 | " Deviceid [%x%x%x%x] marked out of use.\n", | 823 | " Deviceid [%x%x%x%x] marked out of use.\n", |
600 | ds_addr, err, p[0], p[1], p[2], p[3]); | 824 | ds_remotestr, err, p[0], p[1], p[2], p[3]); |
601 | 825 | ||
602 | spin_lock(&nfs4_ds_cache_lock); | 826 | spin_lock(&nfs4_ds_cache_lock); |
603 | dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY; | 827 | dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY; |
@@ -628,7 +852,7 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) | |||
628 | err = nfs4_ds_connect(s, ds); | 852 | err = nfs4_ds_connect(s, ds); |
629 | if (err) { | 853 | if (err) { |
630 | filelayout_mark_devid_negative(dsaddr, err, | 854 | filelayout_mark_devid_negative(dsaddr, err, |
631 | ntohl(ds->ds_ip_addr)); | 855 | ds->ds_remotestr); |
632 | return NULL; | 856 | return NULL; |
633 | } | 857 | } |
634 | } | 858 | } |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5879b23e0c9..003cb6955a2 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -80,7 +80,10 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, | |||
80 | static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, | 80 | static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, |
81 | struct nfs_fattr *fattr, struct iattr *sattr, | 81 | struct nfs_fattr *fattr, struct iattr *sattr, |
82 | struct nfs4_state *state); | 82 | struct nfs4_state *state); |
83 | 83 | #ifdef CONFIG_NFS_V4_1 | |
84 | static int nfs41_test_stateid(struct nfs_server *, struct nfs4_state *); | ||
85 | static int nfs41_free_stateid(struct nfs_server *, struct nfs4_state *); | ||
86 | #endif | ||
84 | /* Prevent leaks of NFSv4 errors into userland */ | 87 | /* Prevent leaks of NFSv4 errors into userland */ |
85 | static int nfs4_map_errors(int err) | 88 | static int nfs4_map_errors(int err) |
86 | { | 89 | { |
@@ -137,12 +140,13 @@ const u32 nfs4_pathconf_bitmap[2] = { | |||
137 | 0 | 140 | 0 |
138 | }; | 141 | }; |
139 | 142 | ||
140 | const u32 nfs4_fsinfo_bitmap[2] = { FATTR4_WORD0_MAXFILESIZE | 143 | const u32 nfs4_fsinfo_bitmap[3] = { FATTR4_WORD0_MAXFILESIZE |
141 | | FATTR4_WORD0_MAXREAD | 144 | | FATTR4_WORD0_MAXREAD |
142 | | FATTR4_WORD0_MAXWRITE | 145 | | FATTR4_WORD0_MAXWRITE |
143 | | FATTR4_WORD0_LEASE_TIME, | 146 | | FATTR4_WORD0_LEASE_TIME, |
144 | FATTR4_WORD1_TIME_DELTA | 147 | FATTR4_WORD1_TIME_DELTA |
145 | | FATTR4_WORD1_FS_LAYOUT_TYPES | 148 | | FATTR4_WORD1_FS_LAYOUT_TYPES, |
149 | FATTR4_WORD2_LAYOUT_BLKSIZE | ||
146 | }; | 150 | }; |
147 | 151 | ||
148 | const u32 nfs4_fs_locations_bitmap[2] = { | 152 | const u32 nfs4_fs_locations_bitmap[2] = { |
@@ -763,8 +767,8 @@ struct nfs4_opendata { | |||
763 | struct nfs_open_confirmres c_res; | 767 | struct nfs_open_confirmres c_res; |
764 | struct nfs_fattr f_attr; | 768 | struct nfs_fattr f_attr; |
765 | struct nfs_fattr dir_attr; | 769 | struct nfs_fattr dir_attr; |
766 | struct path path; | ||
767 | struct dentry *dir; | 770 | struct dentry *dir; |
771 | struct dentry *dentry; | ||
768 | struct nfs4_state_owner *owner; | 772 | struct nfs4_state_owner *owner; |
769 | struct nfs4_state *state; | 773 | struct nfs4_state *state; |
770 | struct iattr attrs; | 774 | struct iattr attrs; |
@@ -786,12 +790,12 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p) | |||
786 | nfs_fattr_init(&p->dir_attr); | 790 | nfs_fattr_init(&p->dir_attr); |
787 | } | 791 | } |
788 | 792 | ||
789 | static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path, | 793 | static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, |
790 | struct nfs4_state_owner *sp, fmode_t fmode, int flags, | 794 | struct nfs4_state_owner *sp, fmode_t fmode, int flags, |
791 | const struct iattr *attrs, | 795 | const struct iattr *attrs, |
792 | gfp_t gfp_mask) | 796 | gfp_t gfp_mask) |
793 | { | 797 | { |
794 | struct dentry *parent = dget_parent(path->dentry); | 798 | struct dentry *parent = dget_parent(dentry); |
795 | struct inode *dir = parent->d_inode; | 799 | struct inode *dir = parent->d_inode; |
796 | struct nfs_server *server = NFS_SERVER(dir); | 800 | struct nfs_server *server = NFS_SERVER(dir); |
797 | struct nfs4_opendata *p; | 801 | struct nfs4_opendata *p; |
@@ -802,8 +806,8 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path, | |||
802 | p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid, gfp_mask); | 806 | p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid, gfp_mask); |
803 | if (p->o_arg.seqid == NULL) | 807 | if (p->o_arg.seqid == NULL) |
804 | goto err_free; | 808 | goto err_free; |
805 | path_get(path); | 809 | nfs_sb_active(dentry->d_sb); |
806 | p->path = *path; | 810 | p->dentry = dget(dentry); |
807 | p->dir = parent; | 811 | p->dir = parent; |
808 | p->owner = sp; | 812 | p->owner = sp; |
809 | atomic_inc(&sp->so_count); | 813 | atomic_inc(&sp->so_count); |
@@ -812,7 +816,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path, | |||
812 | p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE); | 816 | p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE); |
813 | p->o_arg.clientid = server->nfs_client->cl_clientid; | 817 | p->o_arg.clientid = server->nfs_client->cl_clientid; |
814 | p->o_arg.id = sp->so_owner_id.id; | 818 | p->o_arg.id = sp->so_owner_id.id; |
815 | p->o_arg.name = &p->path.dentry->d_name; | 819 | p->o_arg.name = &dentry->d_name; |
816 | p->o_arg.server = server; | 820 | p->o_arg.server = server; |
817 | p->o_arg.bitmask = server->attr_bitmask; | 821 | p->o_arg.bitmask = server->attr_bitmask; |
818 | p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; | 822 | p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; |
@@ -842,13 +846,15 @@ static void nfs4_opendata_free(struct kref *kref) | |||
842 | { | 846 | { |
843 | struct nfs4_opendata *p = container_of(kref, | 847 | struct nfs4_opendata *p = container_of(kref, |
844 | struct nfs4_opendata, kref); | 848 | struct nfs4_opendata, kref); |
849 | struct super_block *sb = p->dentry->d_sb; | ||
845 | 850 | ||
846 | nfs_free_seqid(p->o_arg.seqid); | 851 | nfs_free_seqid(p->o_arg.seqid); |
847 | if (p->state != NULL) | 852 | if (p->state != NULL) |
848 | nfs4_put_open_state(p->state); | 853 | nfs4_put_open_state(p->state); |
849 | nfs4_put_state_owner(p->owner); | 854 | nfs4_put_state_owner(p->owner); |
850 | dput(p->dir); | 855 | dput(p->dir); |
851 | path_put(&p->path); | 856 | dput(p->dentry); |
857 | nfs_sb_deactive(sb); | ||
852 | kfree(p); | 858 | kfree(p); |
853 | } | 859 | } |
854 | 860 | ||
@@ -1130,7 +1136,7 @@ static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context | |||
1130 | { | 1136 | { |
1131 | struct nfs4_opendata *opendata; | 1137 | struct nfs4_opendata *opendata; |
1132 | 1138 | ||
1133 | opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, 0, NULL, GFP_NOFS); | 1139 | opendata = nfs4_opendata_alloc(ctx->dentry, state->owner, 0, 0, NULL, GFP_NOFS); |
1134 | if (opendata == NULL) | 1140 | if (opendata == NULL) |
1135 | return ERR_PTR(-ENOMEM); | 1141 | return ERR_PTR(-ENOMEM); |
1136 | opendata->state = state; | 1142 | opendata->state = state; |
@@ -1154,7 +1160,7 @@ static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, fmode_t fmod | |||
1154 | newstate = nfs4_opendata_to_nfs4_state(opendata); | 1160 | newstate = nfs4_opendata_to_nfs4_state(opendata); |
1155 | if (IS_ERR(newstate)) | 1161 | if (IS_ERR(newstate)) |
1156 | return PTR_ERR(newstate); | 1162 | return PTR_ERR(newstate); |
1157 | nfs4_close_state(&opendata->path, newstate, fmode); | 1163 | nfs4_close_state(newstate, fmode); |
1158 | *res = newstate; | 1164 | *res = newstate; |
1159 | return 0; | 1165 | return 0; |
1160 | } | 1166 | } |
@@ -1352,7 +1358,7 @@ static void nfs4_open_confirm_release(void *calldata) | |||
1352 | goto out_free; | 1358 | goto out_free; |
1353 | state = nfs4_opendata_to_nfs4_state(data); | 1359 | state = nfs4_opendata_to_nfs4_state(data); |
1354 | if (!IS_ERR(state)) | 1360 | if (!IS_ERR(state)) |
1355 | nfs4_close_state(&data->path, state, data->o_arg.fmode); | 1361 | nfs4_close_state(state, data->o_arg.fmode); |
1356 | out_free: | 1362 | out_free: |
1357 | nfs4_opendata_put(data); | 1363 | nfs4_opendata_put(data); |
1358 | } | 1364 | } |
@@ -1497,7 +1503,7 @@ static void nfs4_open_release(void *calldata) | |||
1497 | goto out_free; | 1503 | goto out_free; |
1498 | state = nfs4_opendata_to_nfs4_state(data); | 1504 | state = nfs4_opendata_to_nfs4_state(data); |
1499 | if (!IS_ERR(state)) | 1505 | if (!IS_ERR(state)) |
1500 | nfs4_close_state(&data->path, state, data->o_arg.fmode); | 1506 | nfs4_close_state(state, data->o_arg.fmode); |
1501 | out_free: | 1507 | out_free: |
1502 | nfs4_opendata_put(data); | 1508 | nfs4_opendata_put(data); |
1503 | } | 1509 | } |
@@ -1648,7 +1654,7 @@ static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *s | |||
1648 | return PTR_ERR(opendata); | 1654 | return PTR_ERR(opendata); |
1649 | ret = nfs4_open_recover(opendata, state); | 1655 | ret = nfs4_open_recover(opendata, state); |
1650 | if (ret == -ESTALE) | 1656 | if (ret == -ESTALE) |
1651 | d_drop(ctx->path.dentry); | 1657 | d_drop(ctx->dentry); |
1652 | nfs4_opendata_put(opendata); | 1658 | nfs4_opendata_put(opendata); |
1653 | return ret; | 1659 | return ret; |
1654 | } | 1660 | } |
@@ -1687,6 +1693,20 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta | |||
1687 | return ret; | 1693 | return ret; |
1688 | } | 1694 | } |
1689 | 1695 | ||
1696 | #if defined(CONFIG_NFS_V4_1) | ||
1697 | static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state) | ||
1698 | { | ||
1699 | int status; | ||
1700 | struct nfs_server *server = NFS_SERVER(state->inode); | ||
1701 | |||
1702 | status = nfs41_test_stateid(server, state); | ||
1703 | if (status == NFS_OK) | ||
1704 | return 0; | ||
1705 | nfs41_free_stateid(server, state); | ||
1706 | return nfs4_open_expired(sp, state); | ||
1707 | } | ||
1708 | #endif | ||
1709 | |||
1690 | /* | 1710 | /* |
1691 | * on an EXCLUSIVE create, the server should send back a bitmask with FATTR4-* | 1711 | * on an EXCLUSIVE create, the server should send back a bitmask with FATTR4-* |
1692 | * fields corresponding to attributes that were used to store the verifier. | 1712 | * fields corresponding to attributes that were used to store the verifier. |
@@ -1706,7 +1726,7 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct | |||
1706 | /* | 1726 | /* |
1707 | * Returns a referenced nfs4_state | 1727 | * Returns a referenced nfs4_state |
1708 | */ | 1728 | */ |
1709 | static int _nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res) | 1729 | static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res) |
1710 | { | 1730 | { |
1711 | struct nfs4_state_owner *sp; | 1731 | struct nfs4_state_owner *sp; |
1712 | struct nfs4_state *state = NULL; | 1732 | struct nfs4_state *state = NULL; |
@@ -1723,15 +1743,15 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, in | |||
1723 | status = nfs4_recover_expired_lease(server); | 1743 | status = nfs4_recover_expired_lease(server); |
1724 | if (status != 0) | 1744 | if (status != 0) |
1725 | goto err_put_state_owner; | 1745 | goto err_put_state_owner; |
1726 | if (path->dentry->d_inode != NULL) | 1746 | if (dentry->d_inode != NULL) |
1727 | nfs4_return_incompatible_delegation(path->dentry->d_inode, fmode); | 1747 | nfs4_return_incompatible_delegation(dentry->d_inode, fmode); |
1728 | status = -ENOMEM; | 1748 | status = -ENOMEM; |
1729 | opendata = nfs4_opendata_alloc(path, sp, fmode, flags, sattr, GFP_KERNEL); | 1749 | opendata = nfs4_opendata_alloc(dentry, sp, fmode, flags, sattr, GFP_KERNEL); |
1730 | if (opendata == NULL) | 1750 | if (opendata == NULL) |
1731 | goto err_put_state_owner; | 1751 | goto err_put_state_owner; |
1732 | 1752 | ||
1733 | if (path->dentry->d_inode != NULL) | 1753 | if (dentry->d_inode != NULL) |
1734 | opendata->state = nfs4_get_open_state(path->dentry->d_inode, sp); | 1754 | opendata->state = nfs4_get_open_state(dentry->d_inode, sp); |
1735 | 1755 | ||
1736 | status = _nfs4_proc_open(opendata); | 1756 | status = _nfs4_proc_open(opendata); |
1737 | if (status != 0) | 1757 | if (status != 0) |
@@ -1769,14 +1789,14 @@ out_err: | |||
1769 | } | 1789 | } |
1770 | 1790 | ||
1771 | 1791 | ||
1772 | static struct nfs4_state *nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred) | 1792 | static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred) |
1773 | { | 1793 | { |
1774 | struct nfs4_exception exception = { }; | 1794 | struct nfs4_exception exception = { }; |
1775 | struct nfs4_state *res; | 1795 | struct nfs4_state *res; |
1776 | int status; | 1796 | int status; |
1777 | 1797 | ||
1778 | do { | 1798 | do { |
1779 | status = _nfs4_do_open(dir, path, fmode, flags, sattr, cred, &res); | 1799 | status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred, &res); |
1780 | if (status == 0) | 1800 | if (status == 0) |
1781 | break; | 1801 | break; |
1782 | /* NOTE: BAD_SEQID means the server and client disagree about the | 1802 | /* NOTE: BAD_SEQID means the server and client disagree about the |
@@ -1873,7 +1893,6 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, | |||
1873 | } | 1893 | } |
1874 | 1894 | ||
1875 | struct nfs4_closedata { | 1895 | struct nfs4_closedata { |
1876 | struct path path; | ||
1877 | struct inode *inode; | 1896 | struct inode *inode; |
1878 | struct nfs4_state *state; | 1897 | struct nfs4_state *state; |
1879 | struct nfs_closeargs arg; | 1898 | struct nfs_closeargs arg; |
@@ -1888,13 +1907,14 @@ static void nfs4_free_closedata(void *data) | |||
1888 | { | 1907 | { |
1889 | struct nfs4_closedata *calldata = data; | 1908 | struct nfs4_closedata *calldata = data; |
1890 | struct nfs4_state_owner *sp = calldata->state->owner; | 1909 | struct nfs4_state_owner *sp = calldata->state->owner; |
1910 | struct super_block *sb = calldata->state->inode->i_sb; | ||
1891 | 1911 | ||
1892 | if (calldata->roc) | 1912 | if (calldata->roc) |
1893 | pnfs_roc_release(calldata->state->inode); | 1913 | pnfs_roc_release(calldata->state->inode); |
1894 | nfs4_put_open_state(calldata->state); | 1914 | nfs4_put_open_state(calldata->state); |
1895 | nfs_free_seqid(calldata->arg.seqid); | 1915 | nfs_free_seqid(calldata->arg.seqid); |
1896 | nfs4_put_state_owner(sp); | 1916 | nfs4_put_state_owner(sp); |
1897 | path_put(&calldata->path); | 1917 | nfs_sb_deactive(sb); |
1898 | kfree(calldata); | 1918 | kfree(calldata); |
1899 | } | 1919 | } |
1900 | 1920 | ||
@@ -2014,7 +2034,7 @@ static const struct rpc_call_ops nfs4_close_ops = { | |||
2014 | * | 2034 | * |
2015 | * NOTE: Caller must be holding the sp->so_owner semaphore! | 2035 | * NOTE: Caller must be holding the sp->so_owner semaphore! |
2016 | */ | 2036 | */ |
2017 | int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc) | 2037 | int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc) |
2018 | { | 2038 | { |
2019 | struct nfs_server *server = NFS_SERVER(state->inode); | 2039 | struct nfs_server *server = NFS_SERVER(state->inode); |
2020 | struct nfs4_closedata *calldata; | 2040 | struct nfs4_closedata *calldata; |
@@ -2050,8 +2070,7 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, i | |||
2050 | calldata->res.seqid = calldata->arg.seqid; | 2070 | calldata->res.seqid = calldata->arg.seqid; |
2051 | calldata->res.server = server; | 2071 | calldata->res.server = server; |
2052 | calldata->roc = roc; | 2072 | calldata->roc = roc; |
2053 | path_get(path); | 2073 | nfs_sb_active(calldata->inode->i_sb); |
2054 | calldata->path = *path; | ||
2055 | 2074 | ||
2056 | msg.rpc_argp = &calldata->arg; | 2075 | msg.rpc_argp = &calldata->arg; |
2057 | msg.rpc_resp = &calldata->res; | 2076 | msg.rpc_resp = &calldata->res; |
@@ -2080,7 +2099,7 @@ nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags | |||
2080 | struct nfs4_state *state; | 2099 | struct nfs4_state *state; |
2081 | 2100 | ||
2082 | /* Protect against concurrent sillydeletes */ | 2101 | /* Protect against concurrent sillydeletes */ |
2083 | state = nfs4_do_open(dir, &ctx->path, ctx->mode, open_flags, attr, ctx->cred); | 2102 | state = nfs4_do_open(dir, ctx->dentry, ctx->mode, open_flags, attr, ctx->cred); |
2084 | if (IS_ERR(state)) | 2103 | if (IS_ERR(state)) |
2085 | return ERR_CAST(state); | 2104 | return ERR_CAST(state); |
2086 | ctx->state = state; | 2105 | ctx->state = state; |
@@ -2092,9 +2111,9 @@ static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync) | |||
2092 | if (ctx->state == NULL) | 2111 | if (ctx->state == NULL) |
2093 | return; | 2112 | return; |
2094 | if (is_sync) | 2113 | if (is_sync) |
2095 | nfs4_close_sync(&ctx->path, ctx->state, ctx->mode); | 2114 | nfs4_close_sync(ctx->state, ctx->mode); |
2096 | else | 2115 | else |
2097 | nfs4_close_state(&ctx->path, ctx->state, ctx->mode); | 2116 | nfs4_close_state(ctx->state, ctx->mode); |
2098 | } | 2117 | } |
2099 | 2118 | ||
2100 | static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) | 2119 | static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) |
@@ -2251,13 +2270,14 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, | |||
2251 | static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, | 2270 | static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, |
2252 | struct nfs_fsinfo *info) | 2271 | struct nfs_fsinfo *info) |
2253 | { | 2272 | { |
2273 | int minor_version = server->nfs_client->cl_minorversion; | ||
2254 | int status = nfs4_lookup_root(server, fhandle, info); | 2274 | int status = nfs4_lookup_root(server, fhandle, info); |
2255 | if ((status == -NFS4ERR_WRONGSEC) && !(server->flags & NFS_MOUNT_SECFLAVOUR)) | 2275 | if ((status == -NFS4ERR_WRONGSEC) && !(server->flags & NFS_MOUNT_SECFLAVOUR)) |
2256 | /* | 2276 | /* |
2257 | * A status of -NFS4ERR_WRONGSEC will be mapped to -EPERM | 2277 | * A status of -NFS4ERR_WRONGSEC will be mapped to -EPERM |
2258 | * by nfs4_map_errors() as this function exits. | 2278 | * by nfs4_map_errors() as this function exits. |
2259 | */ | 2279 | */ |
2260 | status = nfs4_find_root_sec(server, fhandle, info); | 2280 | status = nfs_v4_minor_ops[minor_version]->find_root_sec(server, fhandle, info); |
2261 | if (status == 0) | 2281 | if (status == 0) |
2262 | status = nfs4_server_capabilities(server, fhandle); | 2282 | status = nfs4_server_capabilities(server, fhandle); |
2263 | if (status == 0) | 2283 | if (status == 0) |
@@ -2616,10 +2636,7 @@ static int | |||
2616 | nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, | 2636 | nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, |
2617 | int flags, struct nfs_open_context *ctx) | 2637 | int flags, struct nfs_open_context *ctx) |
2618 | { | 2638 | { |
2619 | struct path my_path = { | 2639 | struct dentry *de = dentry; |
2620 | .dentry = dentry, | ||
2621 | }; | ||
2622 | struct path *path = &my_path; | ||
2623 | struct nfs4_state *state; | 2640 | struct nfs4_state *state; |
2624 | struct rpc_cred *cred = NULL; | 2641 | struct rpc_cred *cred = NULL; |
2625 | fmode_t fmode = 0; | 2642 | fmode_t fmode = 0; |
@@ -2627,11 +2644,11 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, | |||
2627 | 2644 | ||
2628 | if (ctx != NULL) { | 2645 | if (ctx != NULL) { |
2629 | cred = ctx->cred; | 2646 | cred = ctx->cred; |
2630 | path = &ctx->path; | 2647 | de = ctx->dentry; |
2631 | fmode = ctx->mode; | 2648 | fmode = ctx->mode; |
2632 | } | 2649 | } |
2633 | sattr->ia_mode &= ~current_umask(); | 2650 | sattr->ia_mode &= ~current_umask(); |
2634 | state = nfs4_do_open(dir, path, fmode, flags, sattr, cred); | 2651 | state = nfs4_do_open(dir, de, fmode, flags, sattr, cred); |
2635 | d_drop(dentry); | 2652 | d_drop(dentry); |
2636 | if (IS_ERR(state)) { | 2653 | if (IS_ERR(state)) { |
2637 | status = PTR_ERR(state); | 2654 | status = PTR_ERR(state); |
@@ -2642,7 +2659,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, | |||
2642 | if (ctx != NULL) | 2659 | if (ctx != NULL) |
2643 | ctx->state = state; | 2660 | ctx->state = state; |
2644 | else | 2661 | else |
2645 | nfs4_close_sync(path, state, fmode); | 2662 | nfs4_close_sync(state, fmode); |
2646 | out: | 2663 | out: |
2647 | return status; | 2664 | return status; |
2648 | } | 2665 | } |
@@ -3357,9 +3374,13 @@ static void nfs4_renew_done(struct rpc_task *task, void *calldata) | |||
3357 | 3374 | ||
3358 | if (task->tk_status < 0) { | 3375 | if (task->tk_status < 0) { |
3359 | /* Unless we're shutting down, schedule state recovery! */ | 3376 | /* Unless we're shutting down, schedule state recovery! */ |
3360 | if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) != 0) | 3377 | if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) == 0) |
3378 | return; | ||
3379 | if (task->tk_status != NFS4ERR_CB_PATH_DOWN) { | ||
3361 | nfs4_schedule_lease_recovery(clp); | 3380 | nfs4_schedule_lease_recovery(clp); |
3362 | return; | 3381 | return; |
3382 | } | ||
3383 | nfs4_schedule_path_down_recovery(clp); | ||
3363 | } | 3384 | } |
3364 | do_renew_lease(clp, timestamp); | 3385 | do_renew_lease(clp, timestamp); |
3365 | } | 3386 | } |
@@ -3369,7 +3390,7 @@ static const struct rpc_call_ops nfs4_renew_ops = { | |||
3369 | .rpc_release = nfs4_renew_release, | 3390 | .rpc_release = nfs4_renew_release, |
3370 | }; | 3391 | }; |
3371 | 3392 | ||
3372 | int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred) | 3393 | static int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred, unsigned renew_flags) |
3373 | { | 3394 | { |
3374 | struct rpc_message msg = { | 3395 | struct rpc_message msg = { |
3375 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], | 3396 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], |
@@ -3378,9 +3399,11 @@ int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred) | |||
3378 | }; | 3399 | }; |
3379 | struct nfs4_renewdata *data; | 3400 | struct nfs4_renewdata *data; |
3380 | 3401 | ||
3402 | if (renew_flags == 0) | ||
3403 | return 0; | ||
3381 | if (!atomic_inc_not_zero(&clp->cl_count)) | 3404 | if (!atomic_inc_not_zero(&clp->cl_count)) |
3382 | return -EIO; | 3405 | return -EIO; |
3383 | data = kmalloc(sizeof(*data), GFP_KERNEL); | 3406 | data = kmalloc(sizeof(*data), GFP_NOFS); |
3384 | if (data == NULL) | 3407 | if (data == NULL) |
3385 | return -ENOMEM; | 3408 | return -ENOMEM; |
3386 | data->client = clp; | 3409 | data->client = clp; |
@@ -3389,7 +3412,7 @@ int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred) | |||
3389 | &nfs4_renew_ops, data); | 3412 | &nfs4_renew_ops, data); |
3390 | } | 3413 | } |
3391 | 3414 | ||
3392 | int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred) | 3415 | static int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred) |
3393 | { | 3416 | { |
3394 | struct rpc_message msg = { | 3417 | struct rpc_message msg = { |
3395 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], | 3418 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], |
@@ -3419,19 +3442,6 @@ static inline int nfs4_server_supports_acls(struct nfs_server *server) | |||
3419 | */ | 3442 | */ |
3420 | #define NFS4ACL_MAXPAGES (XATTR_SIZE_MAX >> PAGE_CACHE_SHIFT) | 3443 | #define NFS4ACL_MAXPAGES (XATTR_SIZE_MAX >> PAGE_CACHE_SHIFT) |
3421 | 3444 | ||
3422 | static void buf_to_pages(const void *buf, size_t buflen, | ||
3423 | struct page **pages, unsigned int *pgbase) | ||
3424 | { | ||
3425 | const void *p = buf; | ||
3426 | |||
3427 | *pgbase = offset_in_page(buf); | ||
3428 | p -= *pgbase; | ||
3429 | while (p < buf + buflen) { | ||
3430 | *(pages++) = virt_to_page(p); | ||
3431 | p += PAGE_CACHE_SIZE; | ||
3432 | } | ||
3433 | } | ||
3434 | |||
3435 | static int buf_to_pages_noslab(const void *buf, size_t buflen, | 3445 | static int buf_to_pages_noslab(const void *buf, size_t buflen, |
3436 | struct page **pages, unsigned int *pgbase) | 3446 | struct page **pages, unsigned int *pgbase) |
3437 | { | 3447 | { |
@@ -3528,9 +3538,19 @@ out: | |||
3528 | nfs4_set_cached_acl(inode, acl); | 3538 | nfs4_set_cached_acl(inode, acl); |
3529 | } | 3539 | } |
3530 | 3540 | ||
3541 | /* | ||
3542 | * The getxattr API returns the required buffer length when called with a | ||
3543 | * NULL buf. The NFSv4 acl tool then calls getxattr again after allocating | ||
3544 | * the required buf. On a NULL buf, we send a page of data to the server | ||
3545 | * guessing that the ACL request can be serviced by a page. If so, we cache | ||
3546 | * up to the page of ACL data, and the 2nd call to getxattr is serviced by | ||
3547 | * the cache. If not so, we throw away the page, and cache the required | ||
3548 | * length. The next getxattr call will then produce another round trip to | ||
3549 | * the server, this time with the input buf of the required size. | ||
3550 | */ | ||
3531 | static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen) | 3551 | static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen) |
3532 | { | 3552 | { |
3533 | struct page *pages[NFS4ACL_MAXPAGES]; | 3553 | struct page *pages[NFS4ACL_MAXPAGES] = {NULL, }; |
3534 | struct nfs_getaclargs args = { | 3554 | struct nfs_getaclargs args = { |
3535 | .fh = NFS_FH(inode), | 3555 | .fh = NFS_FH(inode), |
3536 | .acl_pages = pages, | 3556 | .acl_pages = pages, |
@@ -3545,41 +3565,60 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu | |||
3545 | .rpc_argp = &args, | 3565 | .rpc_argp = &args, |
3546 | .rpc_resp = &res, | 3566 | .rpc_resp = &res, |
3547 | }; | 3567 | }; |
3548 | struct page *localpage = NULL; | 3568 | int ret = -ENOMEM, npages, i, acl_len = 0; |
3549 | int ret; | ||
3550 | 3569 | ||
3551 | if (buflen < PAGE_SIZE) { | 3570 | npages = (buflen + PAGE_SIZE - 1) >> PAGE_SHIFT; |
3552 | /* As long as we're doing a round trip to the server anyway, | 3571 | /* As long as we're doing a round trip to the server anyway, |
3553 | * let's be prepared for a page of acl data. */ | 3572 | * let's be prepared for a page of acl data. */ |
3554 | localpage = alloc_page(GFP_KERNEL); | 3573 | if (npages == 0) |
3555 | resp_buf = page_address(localpage); | 3574 | npages = 1; |
3556 | if (localpage == NULL) | 3575 | |
3557 | return -ENOMEM; | 3576 | for (i = 0; i < npages; i++) { |
3558 | args.acl_pages[0] = localpage; | 3577 | pages[i] = alloc_page(GFP_KERNEL); |
3559 | args.acl_pgbase = 0; | 3578 | if (!pages[i]) |
3560 | args.acl_len = PAGE_SIZE; | 3579 | goto out_free; |
3561 | } else { | ||
3562 | resp_buf = buf; | ||
3563 | buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase); | ||
3564 | } | 3580 | } |
3565 | ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), &msg, &args.seq_args, &res.seq_res, 0); | 3581 | if (npages > 1) { |
3582 | /* for decoding across pages */ | ||
3583 | args.acl_scratch = alloc_page(GFP_KERNEL); | ||
3584 | if (!args.acl_scratch) | ||
3585 | goto out_free; | ||
3586 | } | ||
3587 | args.acl_len = npages * PAGE_SIZE; | ||
3588 | args.acl_pgbase = 0; | ||
3589 | /* Let decode_getfacl know not to fail if the ACL data is larger than | ||
3590 | * the page we send as a guess */ | ||
3591 | if (buf == NULL) | ||
3592 | res.acl_flags |= NFS4_ACL_LEN_REQUEST; | ||
3593 | resp_buf = page_address(pages[0]); | ||
3594 | |||
3595 | dprintk("%s buf %p buflen %ld npages %d args.acl_len %ld\n", | ||
3596 | __func__, buf, buflen, npages, args.acl_len); | ||
3597 | ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), | ||
3598 | &msg, &args.seq_args, &res.seq_res, 0); | ||
3566 | if (ret) | 3599 | if (ret) |
3567 | goto out_free; | 3600 | goto out_free; |
3568 | if (res.acl_len > args.acl_len) | 3601 | |
3569 | nfs4_write_cached_acl(inode, NULL, res.acl_len); | 3602 | acl_len = res.acl_len - res.acl_data_offset; |
3603 | if (acl_len > args.acl_len) | ||
3604 | nfs4_write_cached_acl(inode, NULL, acl_len); | ||
3570 | else | 3605 | else |
3571 | nfs4_write_cached_acl(inode, resp_buf, res.acl_len); | 3606 | nfs4_write_cached_acl(inode, resp_buf + res.acl_data_offset, |
3607 | acl_len); | ||
3572 | if (buf) { | 3608 | if (buf) { |
3573 | ret = -ERANGE; | 3609 | ret = -ERANGE; |
3574 | if (res.acl_len > buflen) | 3610 | if (acl_len > buflen) |
3575 | goto out_free; | 3611 | goto out_free; |
3576 | if (localpage) | 3612 | _copy_from_pages(buf, pages, res.acl_data_offset, |
3577 | memcpy(buf, resp_buf, res.acl_len); | 3613 | res.acl_len); |
3578 | } | 3614 | } |
3579 | ret = res.acl_len; | 3615 | ret = acl_len; |
3580 | out_free: | 3616 | out_free: |
3581 | if (localpage) | 3617 | for (i = 0; i < npages; i++) |
3582 | __free_page(localpage); | 3618 | if (pages[i]) |
3619 | __free_page(pages[i]); | ||
3620 | if (args.acl_scratch) | ||
3621 | __free_page(args.acl_scratch); | ||
3583 | return ret; | 3622 | return ret; |
3584 | } | 3623 | } |
3585 | 3624 | ||
@@ -3610,6 +3649,8 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen) | |||
3610 | nfs_zap_acl_cache(inode); | 3649 | nfs_zap_acl_cache(inode); |
3611 | ret = nfs4_read_cached_acl(inode, buf, buflen); | 3650 | ret = nfs4_read_cached_acl(inode, buf, buflen); |
3612 | if (ret != -ENOENT) | 3651 | if (ret != -ENOENT) |
3652 | /* -ENOENT is returned if there is no ACL or if there is an ACL | ||
3653 | * but no cached acl data, just the acl length */ | ||
3613 | return ret; | 3654 | return ret; |
3614 | return nfs4_get_acl_uncached(inode, buf, buflen); | 3655 | return nfs4_get_acl_uncached(inode, buf, buflen); |
3615 | } | 3656 | } |
@@ -4294,7 +4335,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) | |||
4294 | memcpy(data->lsp->ls_stateid.data, data->res.stateid.data, | 4335 | memcpy(data->lsp->ls_stateid.data, data->res.stateid.data, |
4295 | sizeof(data->lsp->ls_stateid.data)); | 4336 | sizeof(data->lsp->ls_stateid.data)); |
4296 | data->lsp->ls_flags |= NFS_LOCK_INITIALIZED; | 4337 | data->lsp->ls_flags |= NFS_LOCK_INITIALIZED; |
4297 | renew_lease(NFS_SERVER(data->ctx->path.dentry->d_inode), data->timestamp); | 4338 | renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp); |
4298 | } | 4339 | } |
4299 | out: | 4340 | out: |
4300 | dprintk("%s: done, ret = %d!\n", __func__, data->rpc_status); | 4341 | dprintk("%s: done, ret = %d!\n", __func__, data->rpc_status); |
@@ -4443,6 +4484,20 @@ out: | |||
4443 | return err; | 4484 | return err; |
4444 | } | 4485 | } |
4445 | 4486 | ||
4487 | #if defined(CONFIG_NFS_V4_1) | ||
4488 | static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *request) | ||
4489 | { | ||
4490 | int status; | ||
4491 | struct nfs_server *server = NFS_SERVER(state->inode); | ||
4492 | |||
4493 | status = nfs41_test_stateid(server, state); | ||
4494 | if (status == NFS_OK) | ||
4495 | return 0; | ||
4496 | nfs41_free_stateid(server, state); | ||
4497 | return nfs4_lock_expired(state, request); | ||
4498 | } | ||
4499 | #endif | ||
4500 | |||
4446 | static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) | 4501 | static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) |
4447 | { | 4502 | { |
4448 | struct nfs_inode *nfsi = NFS_I(state->inode); | 4503 | struct nfs_inode *nfsi = NFS_I(state->inode); |
@@ -4781,6 +4836,16 @@ out_inval: | |||
4781 | return -NFS4ERR_INVAL; | 4836 | return -NFS4ERR_INVAL; |
4782 | } | 4837 | } |
4783 | 4838 | ||
4839 | static bool | ||
4840 | nfs41_same_server_scope(struct server_scope *a, struct server_scope *b) | ||
4841 | { | ||
4842 | if (a->server_scope_sz == b->server_scope_sz && | ||
4843 | memcmp(a->server_scope, b->server_scope, a->server_scope_sz) == 0) | ||
4844 | return true; | ||
4845 | |||
4846 | return false; | ||
4847 | } | ||
4848 | |||
4784 | /* | 4849 | /* |
4785 | * nfs4_proc_exchange_id() | 4850 | * nfs4_proc_exchange_id() |
4786 | * | 4851 | * |
@@ -4823,9 +4888,31 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) | |||
4823 | init_utsname()->domainname, | 4888 | init_utsname()->domainname, |
4824 | clp->cl_rpcclient->cl_auth->au_flavor); | 4889 | clp->cl_rpcclient->cl_auth->au_flavor); |
4825 | 4890 | ||
4891 | res.server_scope = kzalloc(sizeof(struct server_scope), GFP_KERNEL); | ||
4892 | if (unlikely(!res.server_scope)) | ||
4893 | return -ENOMEM; | ||
4894 | |||
4826 | status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); | 4895 | status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); |
4827 | if (!status) | 4896 | if (!status) |
4828 | status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags); | 4897 | status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags); |
4898 | |||
4899 | if (!status) { | ||
4900 | if (clp->server_scope && | ||
4901 | !nfs41_same_server_scope(clp->server_scope, | ||
4902 | res.server_scope)) { | ||
4903 | dprintk("%s: server_scope mismatch detected\n", | ||
4904 | __func__); | ||
4905 | set_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, &clp->cl_state); | ||
4906 | kfree(clp->server_scope); | ||
4907 | clp->server_scope = NULL; | ||
4908 | } | ||
4909 | |||
4910 | if (!clp->server_scope) | ||
4911 | clp->server_scope = res.server_scope; | ||
4912 | else | ||
4913 | kfree(res.server_scope); | ||
4914 | } | ||
4915 | |||
4829 | dprintk("<-- %s status= %d\n", __func__, status); | 4916 | dprintk("<-- %s status= %d\n", __func__, status); |
4830 | return status; | 4917 | return status; |
4831 | } | 4918 | } |
@@ -5441,11 +5528,13 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_ | |||
5441 | return rpc_run_task(&task_setup_data); | 5528 | return rpc_run_task(&task_setup_data); |
5442 | } | 5529 | } |
5443 | 5530 | ||
5444 | static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cred) | 5531 | static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cred, unsigned renew_flags) |
5445 | { | 5532 | { |
5446 | struct rpc_task *task; | 5533 | struct rpc_task *task; |
5447 | int ret = 0; | 5534 | int ret = 0; |
5448 | 5535 | ||
5536 | if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0) | ||
5537 | return 0; | ||
5449 | task = _nfs41_proc_sequence(clp, cred); | 5538 | task = _nfs41_proc_sequence(clp, cred); |
5450 | if (IS_ERR(task)) | 5539 | if (IS_ERR(task)) |
5451 | ret = PTR_ERR(task); | 5540 | ret = PTR_ERR(task); |
@@ -5706,7 +5795,7 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) | |||
5706 | { | 5795 | { |
5707 | struct nfs4_layoutreturn *lrp = calldata; | 5796 | struct nfs4_layoutreturn *lrp = calldata; |
5708 | struct nfs_server *server; | 5797 | struct nfs_server *server; |
5709 | struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout; | 5798 | struct pnfs_layout_hdr *lo = lrp->args.layout; |
5710 | 5799 | ||
5711 | dprintk("--> %s\n", __func__); | 5800 | dprintk("--> %s\n", __func__); |
5712 | 5801 | ||
@@ -5735,7 +5824,7 @@ static void nfs4_layoutreturn_release(void *calldata) | |||
5735 | struct nfs4_layoutreturn *lrp = calldata; | 5824 | struct nfs4_layoutreturn *lrp = calldata; |
5736 | 5825 | ||
5737 | dprintk("--> %s\n", __func__); | 5826 | dprintk("--> %s\n", __func__); |
5738 | put_layout_hdr(NFS_I(lrp->args.inode)->layout); | 5827 | put_layout_hdr(lrp->args.layout); |
5739 | kfree(calldata); | 5828 | kfree(calldata); |
5740 | dprintk("<-- %s\n", __func__); | 5829 | dprintk("<-- %s\n", __func__); |
5741 | } | 5830 | } |
@@ -5772,6 +5861,54 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp) | |||
5772 | return status; | 5861 | return status; |
5773 | } | 5862 | } |
5774 | 5863 | ||
5864 | /* | ||
5865 | * Retrieve the list of Data Server devices from the MDS. | ||
5866 | */ | ||
5867 | static int _nfs4_getdevicelist(struct nfs_server *server, | ||
5868 | const struct nfs_fh *fh, | ||
5869 | struct pnfs_devicelist *devlist) | ||
5870 | { | ||
5871 | struct nfs4_getdevicelist_args args = { | ||
5872 | .fh = fh, | ||
5873 | .layoutclass = server->pnfs_curr_ld->id, | ||
5874 | }; | ||
5875 | struct nfs4_getdevicelist_res res = { | ||
5876 | .devlist = devlist, | ||
5877 | }; | ||
5878 | struct rpc_message msg = { | ||
5879 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETDEVICELIST], | ||
5880 | .rpc_argp = &args, | ||
5881 | .rpc_resp = &res, | ||
5882 | }; | ||
5883 | int status; | ||
5884 | |||
5885 | dprintk("--> %s\n", __func__); | ||
5886 | status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, | ||
5887 | &res.seq_res, 0); | ||
5888 | dprintk("<-- %s status=%d\n", __func__, status); | ||
5889 | return status; | ||
5890 | } | ||
5891 | |||
5892 | int nfs4_proc_getdevicelist(struct nfs_server *server, | ||
5893 | const struct nfs_fh *fh, | ||
5894 | struct pnfs_devicelist *devlist) | ||
5895 | { | ||
5896 | struct nfs4_exception exception = { }; | ||
5897 | int err; | ||
5898 | |||
5899 | do { | ||
5900 | err = nfs4_handle_exception(server, | ||
5901 | _nfs4_getdevicelist(server, fh, devlist), | ||
5902 | &exception); | ||
5903 | } while (exception.retry); | ||
5904 | |||
5905 | dprintk("%s: err=%d, num_devs=%u\n", __func__, | ||
5906 | err, devlist->num_devs); | ||
5907 | |||
5908 | return err; | ||
5909 | } | ||
5910 | EXPORT_SYMBOL_GPL(nfs4_proc_getdevicelist); | ||
5911 | |||
5775 | static int | 5912 | static int |
5776 | _nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev) | 5913 | _nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev) |
5777 | { | 5914 | { |
@@ -5850,9 +5987,16 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata) | |||
5850 | static void nfs4_layoutcommit_release(void *calldata) | 5987 | static void nfs4_layoutcommit_release(void *calldata) |
5851 | { | 5988 | { |
5852 | struct nfs4_layoutcommit_data *data = calldata; | 5989 | struct nfs4_layoutcommit_data *data = calldata; |
5990 | struct pnfs_layout_segment *lseg, *tmp; | ||
5853 | 5991 | ||
5992 | pnfs_cleanup_layoutcommit(data); | ||
5854 | /* Matched by references in pnfs_set_layoutcommit */ | 5993 | /* Matched by references in pnfs_set_layoutcommit */ |
5855 | put_lseg(data->lseg); | 5994 | list_for_each_entry_safe(lseg, tmp, &data->lseg_list, pls_lc_list) { |
5995 | list_del_init(&lseg->pls_lc_list); | ||
5996 | if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, | ||
5997 | &lseg->pls_flags)) | ||
5998 | put_lseg(lseg); | ||
5999 | } | ||
5856 | put_rpccred(data->cred); | 6000 | put_rpccred(data->cred); |
5857 | kfree(data); | 6001 | kfree(data); |
5858 | } | 6002 | } |
@@ -5903,6 +6047,143 @@ out: | |||
5903 | rpc_put_task(task); | 6047 | rpc_put_task(task); |
5904 | return status; | 6048 | return status; |
5905 | } | 6049 | } |
6050 | |||
6051 | static int | ||
6052 | _nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, | ||
6053 | struct nfs_fsinfo *info, struct nfs4_secinfo_flavors *flavors) | ||
6054 | { | ||
6055 | struct nfs41_secinfo_no_name_args args = { | ||
6056 | .style = SECINFO_STYLE_CURRENT_FH, | ||
6057 | }; | ||
6058 | struct nfs4_secinfo_res res = { | ||
6059 | .flavors = flavors, | ||
6060 | }; | ||
6061 | struct rpc_message msg = { | ||
6062 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SECINFO_NO_NAME], | ||
6063 | .rpc_argp = &args, | ||
6064 | .rpc_resp = &res, | ||
6065 | }; | ||
6066 | return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); | ||
6067 | } | ||
6068 | |||
6069 | static int | ||
6070 | nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, | ||
6071 | struct nfs_fsinfo *info, struct nfs4_secinfo_flavors *flavors) | ||
6072 | { | ||
6073 | struct nfs4_exception exception = { }; | ||
6074 | int err; | ||
6075 | do { | ||
6076 | err = _nfs41_proc_secinfo_no_name(server, fhandle, info, flavors); | ||
6077 | switch (err) { | ||
6078 | case 0: | ||
6079 | case -NFS4ERR_WRONGSEC: | ||
6080 | case -NFS4ERR_NOTSUPP: | ||
6081 | break; | ||
6082 | default: | ||
6083 | err = nfs4_handle_exception(server, err, &exception); | ||
6084 | } | ||
6085 | } while (exception.retry); | ||
6086 | return err; | ||
6087 | } | ||
6088 | |||
6089 | static int | ||
6090 | nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, | ||
6091 | struct nfs_fsinfo *info) | ||
6092 | { | ||
6093 | int err; | ||
6094 | struct page *page; | ||
6095 | rpc_authflavor_t flavor; | ||
6096 | struct nfs4_secinfo_flavors *flavors; | ||
6097 | |||
6098 | page = alloc_page(GFP_KERNEL); | ||
6099 | if (!page) { | ||
6100 | err = -ENOMEM; | ||
6101 | goto out; | ||
6102 | } | ||
6103 | |||
6104 | flavors = page_address(page); | ||
6105 | err = nfs41_proc_secinfo_no_name(server, fhandle, info, flavors); | ||
6106 | |||
6107 | /* | ||
6108 | * Fall back on "guess and check" method if | ||
6109 | * the server doesn't support SECINFO_NO_NAME | ||
6110 | */ | ||
6111 | if (err == -NFS4ERR_WRONGSEC || err == -NFS4ERR_NOTSUPP) { | ||
6112 | err = nfs4_find_root_sec(server, fhandle, info); | ||
6113 | goto out_freepage; | ||
6114 | } | ||
6115 | if (err) | ||
6116 | goto out_freepage; | ||
6117 | |||
6118 | flavor = nfs_find_best_sec(flavors); | ||
6119 | if (err == 0) | ||
6120 | err = nfs4_lookup_root_sec(server, fhandle, info, flavor); | ||
6121 | |||
6122 | out_freepage: | ||
6123 | put_page(page); | ||
6124 | if (err == -EACCES) | ||
6125 | return -EPERM; | ||
6126 | out: | ||
6127 | return err; | ||
6128 | } | ||
6129 | static int _nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state) | ||
6130 | { | ||
6131 | int status; | ||
6132 | struct nfs41_test_stateid_args args = { | ||
6133 | .stateid = &state->stateid, | ||
6134 | }; | ||
6135 | struct nfs41_test_stateid_res res; | ||
6136 | struct rpc_message msg = { | ||
6137 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_TEST_STATEID], | ||
6138 | .rpc_argp = &args, | ||
6139 | .rpc_resp = &res, | ||
6140 | }; | ||
6141 | args.seq_args.sa_session = res.seq_res.sr_session = NULL; | ||
6142 | status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 0, 1); | ||
6143 | return status; | ||
6144 | } | ||
6145 | |||
6146 | static int nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state) | ||
6147 | { | ||
6148 | struct nfs4_exception exception = { }; | ||
6149 | int err; | ||
6150 | do { | ||
6151 | err = nfs4_handle_exception(server, | ||
6152 | _nfs41_test_stateid(server, state), | ||
6153 | &exception); | ||
6154 | } while (exception.retry); | ||
6155 | return err; | ||
6156 | } | ||
6157 | |||
6158 | static int _nfs4_free_stateid(struct nfs_server *server, struct nfs4_state *state) | ||
6159 | { | ||
6160 | int status; | ||
6161 | struct nfs41_free_stateid_args args = { | ||
6162 | .stateid = &state->stateid, | ||
6163 | }; | ||
6164 | struct nfs41_free_stateid_res res; | ||
6165 | struct rpc_message msg = { | ||
6166 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FREE_STATEID], | ||
6167 | .rpc_argp = &args, | ||
6168 | .rpc_resp = &res, | ||
6169 | }; | ||
6170 | |||
6171 | args.seq_args.sa_session = res.seq_res.sr_session = NULL; | ||
6172 | status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 0, 1); | ||
6173 | return status; | ||
6174 | } | ||
6175 | |||
6176 | static int nfs41_free_stateid(struct nfs_server *server, struct nfs4_state *state) | ||
6177 | { | ||
6178 | struct nfs4_exception exception = { }; | ||
6179 | int err; | ||
6180 | do { | ||
6181 | err = nfs4_handle_exception(server, | ||
6182 | _nfs4_free_stateid(server, state), | ||
6183 | &exception); | ||
6184 | } while (exception.retry); | ||
6185 | return err; | ||
6186 | } | ||
5906 | #endif /* CONFIG_NFS_V4_1 */ | 6187 | #endif /* CONFIG_NFS_V4_1 */ |
5907 | 6188 | ||
5908 | struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { | 6189 | struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { |
@@ -5939,8 +6220,8 @@ struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = { | |||
5939 | struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = { | 6220 | struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = { |
5940 | .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE, | 6221 | .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE, |
5941 | .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, | 6222 | .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, |
5942 | .recover_open = nfs4_open_expired, | 6223 | .recover_open = nfs41_open_expired, |
5943 | .recover_lock = nfs4_lock_expired, | 6224 | .recover_lock = nfs41_lock_expired, |
5944 | .establish_clid = nfs41_init_clientid, | 6225 | .establish_clid = nfs41_init_clientid, |
5945 | .get_clid_cred = nfs4_get_exchange_id_cred, | 6226 | .get_clid_cred = nfs4_get_exchange_id_cred, |
5946 | }; | 6227 | }; |
@@ -5964,6 +6245,7 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { | |||
5964 | .minor_version = 0, | 6245 | .minor_version = 0, |
5965 | .call_sync = _nfs4_call_sync, | 6246 | .call_sync = _nfs4_call_sync, |
5966 | .validate_stateid = nfs4_validate_delegation_stateid, | 6247 | .validate_stateid = nfs4_validate_delegation_stateid, |
6248 | .find_root_sec = nfs4_find_root_sec, | ||
5967 | .reboot_recovery_ops = &nfs40_reboot_recovery_ops, | 6249 | .reboot_recovery_ops = &nfs40_reboot_recovery_ops, |
5968 | .nograce_recovery_ops = &nfs40_nograce_recovery_ops, | 6250 | .nograce_recovery_ops = &nfs40_nograce_recovery_ops, |
5969 | .state_renewal_ops = &nfs40_state_renewal_ops, | 6251 | .state_renewal_ops = &nfs40_state_renewal_ops, |
@@ -5974,6 +6256,7 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { | |||
5974 | .minor_version = 1, | 6256 | .minor_version = 1, |
5975 | .call_sync = _nfs4_call_sync_session, | 6257 | .call_sync = _nfs4_call_sync_session, |
5976 | .validate_stateid = nfs41_validate_delegation_stateid, | 6258 | .validate_stateid = nfs41_validate_delegation_stateid, |
6259 | .find_root_sec = nfs41_find_root_sec, | ||
5977 | .reboot_recovery_ops = &nfs41_reboot_recovery_ops, | 6260 | .reboot_recovery_ops = &nfs41_reboot_recovery_ops, |
5978 | .nograce_recovery_ops = &nfs41_nograce_recovery_ops, | 6261 | .nograce_recovery_ops = &nfs41_nograce_recovery_ops, |
5979 | .state_renewal_ops = &nfs41_state_renewal_ops, | 6262 | .state_renewal_ops = &nfs41_state_renewal_ops, |
@@ -6002,6 +6285,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { | |||
6002 | .dentry_ops = &nfs4_dentry_operations, | 6285 | .dentry_ops = &nfs4_dentry_operations, |
6003 | .dir_inode_ops = &nfs4_dir_inode_operations, | 6286 | .dir_inode_ops = &nfs4_dir_inode_operations, |
6004 | .file_inode_ops = &nfs4_file_inode_operations, | 6287 | .file_inode_ops = &nfs4_file_inode_operations, |
6288 | .file_ops = &nfs4_file_operations, | ||
6005 | .getroot = nfs4_proc_get_root, | 6289 | .getroot = nfs4_proc_get_root, |
6006 | .getattr = nfs4_proc_getattr, | 6290 | .getattr = nfs4_proc_getattr, |
6007 | .setattr = nfs4_proc_setattr, | 6291 | .setattr = nfs4_proc_setattr, |
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index df8e7f3ca56..dc484c0eae7 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c | |||
@@ -60,6 +60,7 @@ nfs4_renew_state(struct work_struct *work) | |||
60 | struct rpc_cred *cred; | 60 | struct rpc_cred *cred; |
61 | long lease; | 61 | long lease; |
62 | unsigned long last, now; | 62 | unsigned long last, now; |
63 | unsigned renew_flags = 0; | ||
63 | 64 | ||
64 | ops = clp->cl_mvops->state_renewal_ops; | 65 | ops = clp->cl_mvops->state_renewal_ops; |
65 | dprintk("%s: start\n", __func__); | 66 | dprintk("%s: start\n", __func__); |
@@ -72,18 +73,23 @@ nfs4_renew_state(struct work_struct *work) | |||
72 | last = clp->cl_last_renewal; | 73 | last = clp->cl_last_renewal; |
73 | now = jiffies; | 74 | now = jiffies; |
74 | /* Are we close to a lease timeout? */ | 75 | /* Are we close to a lease timeout? */ |
75 | if (time_after(now, last + lease/3)) { | 76 | if (time_after(now, last + lease/3)) |
77 | renew_flags |= NFS4_RENEW_TIMEOUT; | ||
78 | if (nfs_delegations_present(clp)) | ||
79 | renew_flags |= NFS4_RENEW_DELEGATION_CB; | ||
80 | |||
81 | if (renew_flags != 0) { | ||
76 | cred = ops->get_state_renewal_cred_locked(clp); | 82 | cred = ops->get_state_renewal_cred_locked(clp); |
77 | spin_unlock(&clp->cl_lock); | 83 | spin_unlock(&clp->cl_lock); |
78 | if (cred == NULL) { | 84 | if (cred == NULL) { |
79 | if (!nfs_delegations_present(clp)) { | 85 | if (!(renew_flags & NFS4_RENEW_DELEGATION_CB)) { |
80 | set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); | 86 | set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); |
81 | goto out; | 87 | goto out; |
82 | } | 88 | } |
83 | nfs_expire_all_delegations(clp); | 89 | nfs_expire_all_delegations(clp); |
84 | } else { | 90 | } else { |
85 | /* Queue an asynchronous RENEW. */ | 91 | /* Queue an asynchronous RENEW. */ |
86 | ops->sched_state_renewal(clp, cred); | 92 | ops->sched_state_renewal(clp, cred, renew_flags); |
87 | put_rpccred(cred); | 93 | put_rpccred(cred); |
88 | goto out_exp; | 94 | goto out_exp; |
89 | } | 95 | } |
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index e97dd219f84..efd84316f6a 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c | |||
@@ -641,7 +641,7 @@ void nfs4_put_open_state(struct nfs4_state *state) | |||
641 | /* | 641 | /* |
642 | * Close the current file. | 642 | * Close the current file. |
643 | */ | 643 | */ |
644 | static void __nfs4_close(struct path *path, struct nfs4_state *state, | 644 | static void __nfs4_close(struct nfs4_state *state, |
645 | fmode_t fmode, gfp_t gfp_mask, int wait) | 645 | fmode_t fmode, gfp_t gfp_mask, int wait) |
646 | { | 646 | { |
647 | struct nfs4_state_owner *owner = state->owner; | 647 | struct nfs4_state_owner *owner = state->owner; |
@@ -685,18 +685,18 @@ static void __nfs4_close(struct path *path, struct nfs4_state *state, | |||
685 | } else { | 685 | } else { |
686 | bool roc = pnfs_roc(state->inode); | 686 | bool roc = pnfs_roc(state->inode); |
687 | 687 | ||
688 | nfs4_do_close(path, state, gfp_mask, wait, roc); | 688 | nfs4_do_close(state, gfp_mask, wait, roc); |
689 | } | 689 | } |
690 | } | 690 | } |
691 | 691 | ||
692 | void nfs4_close_state(struct path *path, struct nfs4_state *state, fmode_t fmode) | 692 | void nfs4_close_state(struct nfs4_state *state, fmode_t fmode) |
693 | { | 693 | { |
694 | __nfs4_close(path, state, fmode, GFP_NOFS, 0); | 694 | __nfs4_close(state, fmode, GFP_NOFS, 0); |
695 | } | 695 | } |
696 | 696 | ||
697 | void nfs4_close_sync(struct path *path, struct nfs4_state *state, fmode_t fmode) | 697 | void nfs4_close_sync(struct nfs4_state *state, fmode_t fmode) |
698 | { | 698 | { |
699 | __nfs4_close(path, state, fmode, GFP_KERNEL, 1); | 699 | __nfs4_close(state, fmode, GFP_KERNEL, 1); |
700 | } | 700 | } |
701 | 701 | ||
702 | /* | 702 | /* |
@@ -1038,6 +1038,12 @@ void nfs4_schedule_lease_recovery(struct nfs_client *clp) | |||
1038 | nfs4_schedule_state_manager(clp); | 1038 | nfs4_schedule_state_manager(clp); |
1039 | } | 1039 | } |
1040 | 1040 | ||
1041 | void nfs4_schedule_path_down_recovery(struct nfs_client *clp) | ||
1042 | { | ||
1043 | nfs_handle_cb_pathdown(clp); | ||
1044 | nfs4_schedule_state_manager(clp); | ||
1045 | } | ||
1046 | |||
1041 | static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state) | 1047 | static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state) |
1042 | { | 1048 | { |
1043 | 1049 | ||
@@ -1519,16 +1525,16 @@ void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags) | |||
1519 | { | 1525 | { |
1520 | if (!flags) | 1526 | if (!flags) |
1521 | return; | 1527 | return; |
1522 | else if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED) | 1528 | if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED) |
1523 | nfs41_handle_server_reboot(clp); | 1529 | nfs41_handle_server_reboot(clp); |
1524 | else if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED | | 1530 | if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED | |
1525 | SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED | | 1531 | SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED | |
1526 | SEQ4_STATUS_ADMIN_STATE_REVOKED | | 1532 | SEQ4_STATUS_ADMIN_STATE_REVOKED | |
1527 | SEQ4_STATUS_LEASE_MOVED)) | 1533 | SEQ4_STATUS_LEASE_MOVED)) |
1528 | nfs41_handle_state_revoked(clp); | 1534 | nfs41_handle_state_revoked(clp); |
1529 | else if (flags & SEQ4_STATUS_RECALLABLE_STATE_REVOKED) | 1535 | if (flags & SEQ4_STATUS_RECALLABLE_STATE_REVOKED) |
1530 | nfs41_handle_recallable_state_revoked(clp); | 1536 | nfs41_handle_recallable_state_revoked(clp); |
1531 | else if (flags & (SEQ4_STATUS_CB_PATH_DOWN | | 1537 | if (flags & (SEQ4_STATUS_CB_PATH_DOWN | |
1532 | SEQ4_STATUS_BACKCHANNEL_FAULT | | 1538 | SEQ4_STATUS_BACKCHANNEL_FAULT | |
1533 | SEQ4_STATUS_CB_PATH_DOWN_SESSION)) | 1539 | SEQ4_STATUS_CB_PATH_DOWN_SESSION)) |
1534 | nfs41_handle_cb_path_down(clp); | 1540 | nfs41_handle_cb_path_down(clp); |
@@ -1643,7 +1649,14 @@ static void nfs4_state_manager(struct nfs_client *clp) | |||
1643 | goto out_error; | 1649 | goto out_error; |
1644 | } | 1650 | } |
1645 | clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); | 1651 | clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); |
1646 | set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state); | 1652 | |
1653 | if (test_and_clear_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, | ||
1654 | &clp->cl_state)) | ||
1655 | nfs4_state_start_reclaim_nograce(clp); | ||
1656 | else | ||
1657 | set_bit(NFS4CLNT_RECLAIM_REBOOT, | ||
1658 | &clp->cl_state); | ||
1659 | |||
1647 | pnfs_destroy_all_layouts(clp); | 1660 | pnfs_destroy_all_layouts(clp); |
1648 | } | 1661 | } |
1649 | 1662 | ||
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index e6e8f3b9a1d..97f987a981c 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c | |||
@@ -113,7 +113,11 @@ static int nfs4_stat_to_errno(int); | |||
113 | #define encode_restorefh_maxsz (op_encode_hdr_maxsz) | 113 | #define encode_restorefh_maxsz (op_encode_hdr_maxsz) |
114 | #define decode_restorefh_maxsz (op_decode_hdr_maxsz) | 114 | #define decode_restorefh_maxsz (op_decode_hdr_maxsz) |
115 | #define encode_fsinfo_maxsz (encode_getattr_maxsz) | 115 | #define encode_fsinfo_maxsz (encode_getattr_maxsz) |
116 | #define decode_fsinfo_maxsz (op_decode_hdr_maxsz + 15) | 116 | /* The 5 accounts for the PNFS attributes, and assumes that at most three |
117 | * layout types will be returned. | ||
118 | */ | ||
119 | #define decode_fsinfo_maxsz (op_decode_hdr_maxsz + \ | ||
120 | nfs4_fattr_bitmap_maxsz + 4 + 8 + 5) | ||
117 | #define encode_renew_maxsz (op_encode_hdr_maxsz + 3) | 121 | #define encode_renew_maxsz (op_encode_hdr_maxsz + 3) |
118 | #define decode_renew_maxsz (op_decode_hdr_maxsz) | 122 | #define decode_renew_maxsz (op_decode_hdr_maxsz) |
119 | #define encode_setclientid_maxsz \ | 123 | #define encode_setclientid_maxsz \ |
@@ -314,6 +318,17 @@ static int nfs4_stat_to_errno(int); | |||
314 | XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) | 318 | XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) |
315 | #define encode_reclaim_complete_maxsz (op_encode_hdr_maxsz + 4) | 319 | #define encode_reclaim_complete_maxsz (op_encode_hdr_maxsz + 4) |
316 | #define decode_reclaim_complete_maxsz (op_decode_hdr_maxsz + 4) | 320 | #define decode_reclaim_complete_maxsz (op_decode_hdr_maxsz + 4) |
321 | #define encode_getdevicelist_maxsz (op_encode_hdr_maxsz + 4 + \ | ||
322 | encode_verifier_maxsz) | ||
323 | #define decode_getdevicelist_maxsz (op_decode_hdr_maxsz + \ | ||
324 | 2 /* nfs_cookie4 gdlr_cookie */ + \ | ||
325 | decode_verifier_maxsz \ | ||
326 | /* verifier4 gdlr_verifier */ + \ | ||
327 | 1 /* gdlr_deviceid_list count */ + \ | ||
328 | XDR_QUADLEN(NFS4_PNFS_GETDEVLIST_MAXNUM * \ | ||
329 | NFS4_DEVICEID4_SIZE) \ | ||
330 | /* gdlr_deviceid_list */ + \ | ||
331 | 1 /* bool gdlr_eof */) | ||
317 | #define encode_getdeviceinfo_maxsz (op_encode_hdr_maxsz + 4 + \ | 332 | #define encode_getdeviceinfo_maxsz (op_encode_hdr_maxsz + 4 + \ |
318 | XDR_QUADLEN(NFS4_DEVICEID4_SIZE)) | 333 | XDR_QUADLEN(NFS4_DEVICEID4_SIZE)) |
319 | #define decode_getdeviceinfo_maxsz (op_decode_hdr_maxsz + \ | 334 | #define decode_getdeviceinfo_maxsz (op_decode_hdr_maxsz + \ |
@@ -343,6 +358,14 @@ static int nfs4_stat_to_errno(int); | |||
343 | 1 /* FIXME: opaque lrf_body always empty at the moment */) | 358 | 1 /* FIXME: opaque lrf_body always empty at the moment */) |
344 | #define decode_layoutreturn_maxsz (op_decode_hdr_maxsz + \ | 359 | #define decode_layoutreturn_maxsz (op_decode_hdr_maxsz + \ |
345 | 1 + decode_stateid_maxsz) | 360 | 1 + decode_stateid_maxsz) |
361 | #define encode_secinfo_no_name_maxsz (op_encode_hdr_maxsz + 1) | ||
362 | #define decode_secinfo_no_name_maxsz decode_secinfo_maxsz | ||
363 | #define encode_test_stateid_maxsz (op_encode_hdr_maxsz + 2 + \ | ||
364 | XDR_QUADLEN(NFS4_STATEID_SIZE)) | ||
365 | #define decode_test_stateid_maxsz (op_decode_hdr_maxsz + 2 + 1) | ||
366 | #define encode_free_stateid_maxsz (op_encode_hdr_maxsz + 1 + \ | ||
367 | XDR_QUADLEN(NFS4_STATEID_SIZE)) | ||
368 | #define decode_free_stateid_maxsz (op_decode_hdr_maxsz + 1) | ||
346 | #else /* CONFIG_NFS_V4_1 */ | 369 | #else /* CONFIG_NFS_V4_1 */ |
347 | #define encode_sequence_maxsz 0 | 370 | #define encode_sequence_maxsz 0 |
348 | #define decode_sequence_maxsz 0 | 371 | #define decode_sequence_maxsz 0 |
@@ -740,6 +763,14 @@ static int nfs4_stat_to_errno(int); | |||
740 | #define NFS4_dec_reclaim_complete_sz (compound_decode_hdr_maxsz + \ | 763 | #define NFS4_dec_reclaim_complete_sz (compound_decode_hdr_maxsz + \ |
741 | decode_sequence_maxsz + \ | 764 | decode_sequence_maxsz + \ |
742 | decode_reclaim_complete_maxsz) | 765 | decode_reclaim_complete_maxsz) |
766 | #define NFS4_enc_getdevicelist_sz (compound_encode_hdr_maxsz + \ | ||
767 | encode_sequence_maxsz + \ | ||
768 | encode_putfh_maxsz + \ | ||
769 | encode_getdevicelist_maxsz) | ||
770 | #define NFS4_dec_getdevicelist_sz (compound_decode_hdr_maxsz + \ | ||
771 | decode_sequence_maxsz + \ | ||
772 | decode_putfh_maxsz + \ | ||
773 | decode_getdevicelist_maxsz) | ||
743 | #define NFS4_enc_getdeviceinfo_sz (compound_encode_hdr_maxsz + \ | 774 | #define NFS4_enc_getdeviceinfo_sz (compound_encode_hdr_maxsz + \ |
744 | encode_sequence_maxsz +\ | 775 | encode_sequence_maxsz +\ |
745 | encode_getdeviceinfo_maxsz) | 776 | encode_getdeviceinfo_maxsz) |
@@ -772,6 +803,26 @@ static int nfs4_stat_to_errno(int); | |||
772 | decode_sequence_maxsz + \ | 803 | decode_sequence_maxsz + \ |
773 | decode_putfh_maxsz + \ | 804 | decode_putfh_maxsz + \ |
774 | decode_layoutreturn_maxsz) | 805 | decode_layoutreturn_maxsz) |
806 | #define NFS4_enc_secinfo_no_name_sz (compound_encode_hdr_maxsz + \ | ||
807 | encode_sequence_maxsz + \ | ||
808 | encode_putrootfh_maxsz +\ | ||
809 | encode_secinfo_no_name_maxsz) | ||
810 | #define NFS4_dec_secinfo_no_name_sz (compound_decode_hdr_maxsz + \ | ||
811 | decode_sequence_maxsz + \ | ||
812 | decode_putrootfh_maxsz + \ | ||
813 | decode_secinfo_no_name_maxsz) | ||
814 | #define NFS4_enc_test_stateid_sz (compound_encode_hdr_maxsz + \ | ||
815 | encode_sequence_maxsz + \ | ||
816 | encode_test_stateid_maxsz) | ||
817 | #define NFS4_dec_test_stateid_sz (compound_decode_hdr_maxsz + \ | ||
818 | decode_sequence_maxsz + \ | ||
819 | decode_test_stateid_maxsz) | ||
820 | #define NFS4_enc_free_stateid_sz (compound_encode_hdr_maxsz + \ | ||
821 | encode_sequence_maxsz + \ | ||
822 | encode_free_stateid_maxsz) | ||
823 | #define NFS4_dec_free_stateid_sz (compound_decode_hdr_maxsz + \ | ||
824 | decode_sequence_maxsz + \ | ||
825 | decode_free_stateid_maxsz) | ||
775 | 826 | ||
776 | const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + | 827 | const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + |
777 | compound_encode_hdr_maxsz + | 828 | compound_encode_hdr_maxsz + |
@@ -1076,6 +1127,35 @@ static void encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm | |||
1076 | hdr->replen += decode_getattr_maxsz; | 1127 | hdr->replen += decode_getattr_maxsz; |
1077 | } | 1128 | } |
1078 | 1129 | ||
1130 | static void | ||
1131 | encode_getattr_three(struct xdr_stream *xdr, | ||
1132 | uint32_t bm0, uint32_t bm1, uint32_t bm2, | ||
1133 | struct compound_hdr *hdr) | ||
1134 | { | ||
1135 | __be32 *p; | ||
1136 | |||
1137 | p = reserve_space(xdr, 4); | ||
1138 | *p = cpu_to_be32(OP_GETATTR); | ||
1139 | if (bm2) { | ||
1140 | p = reserve_space(xdr, 16); | ||
1141 | *p++ = cpu_to_be32(3); | ||
1142 | *p++ = cpu_to_be32(bm0); | ||
1143 | *p++ = cpu_to_be32(bm1); | ||
1144 | *p = cpu_to_be32(bm2); | ||
1145 | } else if (bm1) { | ||
1146 | p = reserve_space(xdr, 12); | ||
1147 | *p++ = cpu_to_be32(2); | ||
1148 | *p++ = cpu_to_be32(bm0); | ||
1149 | *p = cpu_to_be32(bm1); | ||
1150 | } else { | ||
1151 | p = reserve_space(xdr, 8); | ||
1152 | *p++ = cpu_to_be32(1); | ||
1153 | *p = cpu_to_be32(bm0); | ||
1154 | } | ||
1155 | hdr->nops++; | ||
1156 | hdr->replen += decode_getattr_maxsz; | ||
1157 | } | ||
1158 | |||
1079 | static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) | 1159 | static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) |
1080 | { | 1160 | { |
1081 | encode_getattr_two(xdr, bitmask[0] & nfs4_fattr_bitmap[0], | 1161 | encode_getattr_two(xdr, bitmask[0] & nfs4_fattr_bitmap[0], |
@@ -1084,8 +1164,11 @@ static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct c | |||
1084 | 1164 | ||
1085 | static void encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) | 1165 | static void encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) |
1086 | { | 1166 | { |
1087 | encode_getattr_two(xdr, bitmask[0] & nfs4_fsinfo_bitmap[0], | 1167 | encode_getattr_three(xdr, |
1088 | bitmask[1] & nfs4_fsinfo_bitmap[1], hdr); | 1168 | bitmask[0] & nfs4_fsinfo_bitmap[0], |
1169 | bitmask[1] & nfs4_fsinfo_bitmap[1], | ||
1170 | bitmask[2] & nfs4_fsinfo_bitmap[2], | ||
1171 | hdr); | ||
1089 | } | 1172 | } |
1090 | 1173 | ||
1091 | static void encode_fs_locations(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) | 1174 | static void encode_fs_locations(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) |
@@ -1827,6 +1910,26 @@ static void encode_sequence(struct xdr_stream *xdr, | |||
1827 | 1910 | ||
1828 | #ifdef CONFIG_NFS_V4_1 | 1911 | #ifdef CONFIG_NFS_V4_1 |
1829 | static void | 1912 | static void |
1913 | encode_getdevicelist(struct xdr_stream *xdr, | ||
1914 | const struct nfs4_getdevicelist_args *args, | ||
1915 | struct compound_hdr *hdr) | ||
1916 | { | ||
1917 | __be32 *p; | ||
1918 | nfs4_verifier dummy = { | ||
1919 | .data = "dummmmmy", | ||
1920 | }; | ||
1921 | |||
1922 | p = reserve_space(xdr, 20); | ||
1923 | *p++ = cpu_to_be32(OP_GETDEVICELIST); | ||
1924 | *p++ = cpu_to_be32(args->layoutclass); | ||
1925 | *p++ = cpu_to_be32(NFS4_PNFS_GETDEVLIST_MAXNUM); | ||
1926 | xdr_encode_hyper(p, 0ULL); /* cookie */ | ||
1927 | encode_nfs4_verifier(xdr, &dummy); | ||
1928 | hdr->nops++; | ||
1929 | hdr->replen += decode_getdevicelist_maxsz; | ||
1930 | } | ||
1931 | |||
1932 | static void | ||
1830 | encode_getdeviceinfo(struct xdr_stream *xdr, | 1933 | encode_getdeviceinfo(struct xdr_stream *xdr, |
1831 | const struct nfs4_getdeviceinfo_args *args, | 1934 | const struct nfs4_getdeviceinfo_args *args, |
1832 | struct compound_hdr *hdr) | 1935 | struct compound_hdr *hdr) |
@@ -1888,7 +1991,7 @@ encode_layoutcommit(struct xdr_stream *xdr, | |||
1888 | *p++ = cpu_to_be32(OP_LAYOUTCOMMIT); | 1991 | *p++ = cpu_to_be32(OP_LAYOUTCOMMIT); |
1889 | /* Only whole file layouts */ | 1992 | /* Only whole file layouts */ |
1890 | p = xdr_encode_hyper(p, 0); /* offset */ | 1993 | p = xdr_encode_hyper(p, 0); /* offset */ |
1891 | p = xdr_encode_hyper(p, NFS4_MAX_UINT64); /* length */ | 1994 | p = xdr_encode_hyper(p, args->lastbytewritten + 1); /* length */ |
1892 | *p++ = cpu_to_be32(0); /* reclaim */ | 1995 | *p++ = cpu_to_be32(0); /* reclaim */ |
1893 | p = xdr_encode_opaque_fixed(p, args->stateid.data, NFS4_STATEID_SIZE); | 1996 | p = xdr_encode_opaque_fixed(p, args->stateid.data, NFS4_STATEID_SIZE); |
1894 | *p++ = cpu_to_be32(1); /* newoffset = TRUE */ | 1997 | *p++ = cpu_to_be32(1); /* newoffset = TRUE */ |
@@ -1938,6 +2041,46 @@ encode_layoutreturn(struct xdr_stream *xdr, | |||
1938 | hdr->nops++; | 2041 | hdr->nops++; |
1939 | hdr->replen += decode_layoutreturn_maxsz; | 2042 | hdr->replen += decode_layoutreturn_maxsz; |
1940 | } | 2043 | } |
2044 | |||
2045 | static int | ||
2046 | encode_secinfo_no_name(struct xdr_stream *xdr, | ||
2047 | const struct nfs41_secinfo_no_name_args *args, | ||
2048 | struct compound_hdr *hdr) | ||
2049 | { | ||
2050 | __be32 *p; | ||
2051 | p = reserve_space(xdr, 8); | ||
2052 | *p++ = cpu_to_be32(OP_SECINFO_NO_NAME); | ||
2053 | *p++ = cpu_to_be32(args->style); | ||
2054 | hdr->nops++; | ||
2055 | hdr->replen += decode_secinfo_no_name_maxsz; | ||
2056 | return 0; | ||
2057 | } | ||
2058 | |||
2059 | static void encode_test_stateid(struct xdr_stream *xdr, | ||
2060 | struct nfs41_test_stateid_args *args, | ||
2061 | struct compound_hdr *hdr) | ||
2062 | { | ||
2063 | __be32 *p; | ||
2064 | |||
2065 | p = reserve_space(xdr, 8 + NFS4_STATEID_SIZE); | ||
2066 | *p++ = cpu_to_be32(OP_TEST_STATEID); | ||
2067 | *p++ = cpu_to_be32(1); | ||
2068 | xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE); | ||
2069 | hdr->nops++; | ||
2070 | hdr->replen += decode_test_stateid_maxsz; | ||
2071 | } | ||
2072 | |||
2073 | static void encode_free_stateid(struct xdr_stream *xdr, | ||
2074 | struct nfs41_free_stateid_args *args, | ||
2075 | struct compound_hdr *hdr) | ||
2076 | { | ||
2077 | __be32 *p; | ||
2078 | p = reserve_space(xdr, 4 + NFS4_STATEID_SIZE); | ||
2079 | *p++ = cpu_to_be32(OP_FREE_STATEID); | ||
2080 | xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE); | ||
2081 | hdr->nops++; | ||
2082 | hdr->replen += decode_free_stateid_maxsz; | ||
2083 | } | ||
1941 | #endif /* CONFIG_NFS_V4_1 */ | 2084 | #endif /* CONFIG_NFS_V4_1 */ |
1942 | 2085 | ||
1943 | /* | 2086 | /* |
@@ -2374,11 +2517,13 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr, | |||
2374 | encode_compound_hdr(xdr, req, &hdr); | 2517 | encode_compound_hdr(xdr, req, &hdr); |
2375 | encode_sequence(xdr, &args->seq_args, &hdr); | 2518 | encode_sequence(xdr, &args->seq_args, &hdr); |
2376 | encode_putfh(xdr, args->fh, &hdr); | 2519 | encode_putfh(xdr, args->fh, &hdr); |
2377 | replen = hdr.replen + op_decode_hdr_maxsz + nfs4_fattr_bitmap_maxsz + 1; | 2520 | replen = hdr.replen + op_decode_hdr_maxsz + 1; |
2378 | encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr); | 2521 | encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr); |
2379 | 2522 | ||
2380 | xdr_inline_pages(&req->rq_rcv_buf, replen << 2, | 2523 | xdr_inline_pages(&req->rq_rcv_buf, replen << 2, |
2381 | args->acl_pages, args->acl_pgbase, args->acl_len); | 2524 | args->acl_pages, args->acl_pgbase, args->acl_len); |
2525 | xdr_set_scratch_buffer(xdr, page_address(args->acl_scratch), PAGE_SIZE); | ||
2526 | |||
2382 | encode_nops(&hdr); | 2527 | encode_nops(&hdr); |
2383 | } | 2528 | } |
2384 | 2529 | ||
@@ -2536,7 +2681,7 @@ static void nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, | |||
2536 | struct compound_hdr hdr = { | 2681 | struct compound_hdr hdr = { |
2537 | .nops = 0, | 2682 | .nops = 0, |
2538 | }; | 2683 | }; |
2539 | const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 }; | 2684 | const u32 lease_bitmap[3] = { FATTR4_WORD0_LEASE_TIME }; |
2540 | 2685 | ||
2541 | encode_compound_hdr(xdr, req, &hdr); | 2686 | encode_compound_hdr(xdr, req, &hdr); |
2542 | encode_setclientid_confirm(xdr, arg, &hdr); | 2687 | encode_setclientid_confirm(xdr, arg, &hdr); |
@@ -2680,7 +2825,7 @@ static void nfs4_xdr_enc_get_lease_time(struct rpc_rqst *req, | |||
2680 | struct compound_hdr hdr = { | 2825 | struct compound_hdr hdr = { |
2681 | .minorversion = nfs4_xdr_minorversion(&args->la_seq_args), | 2826 | .minorversion = nfs4_xdr_minorversion(&args->la_seq_args), |
2682 | }; | 2827 | }; |
2683 | const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 }; | 2828 | const u32 lease_bitmap[3] = { FATTR4_WORD0_LEASE_TIME }; |
2684 | 2829 | ||
2685 | encode_compound_hdr(xdr, req, &hdr); | 2830 | encode_compound_hdr(xdr, req, &hdr); |
2686 | encode_sequence(xdr, &args->la_seq_args, &hdr); | 2831 | encode_sequence(xdr, &args->la_seq_args, &hdr); |
@@ -2707,6 +2852,24 @@ static void nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req, | |||
2707 | } | 2852 | } |
2708 | 2853 | ||
2709 | /* | 2854 | /* |
2855 | * Encode GETDEVICELIST request | ||
2856 | */ | ||
2857 | static void nfs4_xdr_enc_getdevicelist(struct rpc_rqst *req, | ||
2858 | struct xdr_stream *xdr, | ||
2859 | struct nfs4_getdevicelist_args *args) | ||
2860 | { | ||
2861 | struct compound_hdr hdr = { | ||
2862 | .minorversion = nfs4_xdr_minorversion(&args->seq_args), | ||
2863 | }; | ||
2864 | |||
2865 | encode_compound_hdr(xdr, req, &hdr); | ||
2866 | encode_sequence(xdr, &args->seq_args, &hdr); | ||
2867 | encode_putfh(xdr, args->fh, &hdr); | ||
2868 | encode_getdevicelist(xdr, args, &hdr); | ||
2869 | encode_nops(&hdr); | ||
2870 | } | ||
2871 | |||
2872 | /* | ||
2710 | * Encode GETDEVICEINFO request | 2873 | * Encode GETDEVICEINFO request |
2711 | */ | 2874 | */ |
2712 | static void nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, | 2875 | static void nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, |
@@ -2790,6 +2953,59 @@ static void nfs4_xdr_enc_layoutreturn(struct rpc_rqst *req, | |||
2790 | encode_layoutreturn(xdr, args, &hdr); | 2953 | encode_layoutreturn(xdr, args, &hdr); |
2791 | encode_nops(&hdr); | 2954 | encode_nops(&hdr); |
2792 | } | 2955 | } |
2956 | |||
2957 | /* | ||
2958 | * Encode SECINFO_NO_NAME request | ||
2959 | */ | ||
2960 | static int nfs4_xdr_enc_secinfo_no_name(struct rpc_rqst *req, | ||
2961 | struct xdr_stream *xdr, | ||
2962 | struct nfs41_secinfo_no_name_args *args) | ||
2963 | { | ||
2964 | struct compound_hdr hdr = { | ||
2965 | .minorversion = nfs4_xdr_minorversion(&args->seq_args), | ||
2966 | }; | ||
2967 | |||
2968 | encode_compound_hdr(xdr, req, &hdr); | ||
2969 | encode_sequence(xdr, &args->seq_args, &hdr); | ||
2970 | encode_putrootfh(xdr, &hdr); | ||
2971 | encode_secinfo_no_name(xdr, args, &hdr); | ||
2972 | encode_nops(&hdr); | ||
2973 | return 0; | ||
2974 | } | ||
2975 | |||
2976 | /* | ||
2977 | * Encode TEST_STATEID request | ||
2978 | */ | ||
2979 | static void nfs4_xdr_enc_test_stateid(struct rpc_rqst *req, | ||
2980 | struct xdr_stream *xdr, | ||
2981 | struct nfs41_test_stateid_args *args) | ||
2982 | { | ||
2983 | struct compound_hdr hdr = { | ||
2984 | .minorversion = nfs4_xdr_minorversion(&args->seq_args), | ||
2985 | }; | ||
2986 | |||
2987 | encode_compound_hdr(xdr, req, &hdr); | ||
2988 | encode_sequence(xdr, &args->seq_args, &hdr); | ||
2989 | encode_test_stateid(xdr, args, &hdr); | ||
2990 | encode_nops(&hdr); | ||
2991 | } | ||
2992 | |||
2993 | /* | ||
2994 | * Encode FREE_STATEID request | ||
2995 | */ | ||
2996 | static void nfs4_xdr_enc_free_stateid(struct rpc_rqst *req, | ||
2997 | struct xdr_stream *xdr, | ||
2998 | struct nfs41_free_stateid_args *args) | ||
2999 | { | ||
3000 | struct compound_hdr hdr = { | ||
3001 | .minorversion = nfs4_xdr_minorversion(&args->seq_args), | ||
3002 | }; | ||
3003 | |||
3004 | encode_compound_hdr(xdr, req, &hdr); | ||
3005 | encode_sequence(xdr, &args->seq_args, &hdr); | ||
3006 | encode_free_stateid(xdr, args, &hdr); | ||
3007 | encode_nops(&hdr); | ||
3008 | } | ||
2793 | #endif /* CONFIG_NFS_V4_1 */ | 3009 | #endif /* CONFIG_NFS_V4_1 */ |
2794 | 3010 | ||
2795 | static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) | 3011 | static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) |
@@ -2890,14 +3106,17 @@ static int decode_attr_bitmap(struct xdr_stream *xdr, uint32_t *bitmap) | |||
2890 | goto out_overflow; | 3106 | goto out_overflow; |
2891 | bmlen = be32_to_cpup(p); | 3107 | bmlen = be32_to_cpup(p); |
2892 | 3108 | ||
2893 | bitmap[0] = bitmap[1] = 0; | 3109 | bitmap[0] = bitmap[1] = bitmap[2] = 0; |
2894 | p = xdr_inline_decode(xdr, (bmlen << 2)); | 3110 | p = xdr_inline_decode(xdr, (bmlen << 2)); |
2895 | if (unlikely(!p)) | 3111 | if (unlikely(!p)) |
2896 | goto out_overflow; | 3112 | goto out_overflow; |
2897 | if (bmlen > 0) { | 3113 | if (bmlen > 0) { |
2898 | bitmap[0] = be32_to_cpup(p++); | 3114 | bitmap[0] = be32_to_cpup(p++); |
2899 | if (bmlen > 1) | 3115 | if (bmlen > 1) { |
2900 | bitmap[1] = be32_to_cpup(p); | 3116 | bitmap[1] = be32_to_cpup(p++); |
3117 | if (bmlen > 2) | ||
3118 | bitmap[2] = be32_to_cpup(p); | ||
3119 | } | ||
2901 | } | 3120 | } |
2902 | return 0; | 3121 | return 0; |
2903 | out_overflow: | 3122 | out_overflow: |
@@ -2929,8 +3148,9 @@ static int decode_attr_supported(struct xdr_stream *xdr, uint32_t *bitmap, uint3 | |||
2929 | return ret; | 3148 | return ret; |
2930 | bitmap[0] &= ~FATTR4_WORD0_SUPPORTED_ATTRS; | 3149 | bitmap[0] &= ~FATTR4_WORD0_SUPPORTED_ATTRS; |
2931 | } else | 3150 | } else |
2932 | bitmask[0] = bitmask[1] = 0; | 3151 | bitmask[0] = bitmask[1] = bitmask[2] = 0; |
2933 | dprintk("%s: bitmask=%08x:%08x\n", __func__, bitmask[0], bitmask[1]); | 3152 | dprintk("%s: bitmask=%08x:%08x:%08x\n", __func__, |
3153 | bitmask[0], bitmask[1], bitmask[2]); | ||
2934 | return 0; | 3154 | return 0; |
2935 | } | 3155 | } |
2936 | 3156 | ||
@@ -3984,7 +4204,7 @@ out_overflow: | |||
3984 | static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_res *res) | 4204 | static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_res *res) |
3985 | { | 4205 | { |
3986 | __be32 *savep; | 4206 | __be32 *savep; |
3987 | uint32_t attrlen, bitmap[2] = {0}; | 4207 | uint32_t attrlen, bitmap[3] = {0}; |
3988 | int status; | 4208 | int status; |
3989 | 4209 | ||
3990 | if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) | 4210 | if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) |
@@ -4010,7 +4230,7 @@ xdr_error: | |||
4010 | static int decode_statfs(struct xdr_stream *xdr, struct nfs_fsstat *fsstat) | 4230 | static int decode_statfs(struct xdr_stream *xdr, struct nfs_fsstat *fsstat) |
4011 | { | 4231 | { |
4012 | __be32 *savep; | 4232 | __be32 *savep; |
4013 | uint32_t attrlen, bitmap[2] = {0}; | 4233 | uint32_t attrlen, bitmap[3] = {0}; |
4014 | int status; | 4234 | int status; |
4015 | 4235 | ||
4016 | if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) | 4236 | if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) |
@@ -4042,7 +4262,7 @@ xdr_error: | |||
4042 | static int decode_pathconf(struct xdr_stream *xdr, struct nfs_pathconf *pathconf) | 4262 | static int decode_pathconf(struct xdr_stream *xdr, struct nfs_pathconf *pathconf) |
4043 | { | 4263 | { |
4044 | __be32 *savep; | 4264 | __be32 *savep; |
4045 | uint32_t attrlen, bitmap[2] = {0}; | 4265 | uint32_t attrlen, bitmap[3] = {0}; |
4046 | int status; | 4266 | int status; |
4047 | 4267 | ||
4048 | if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) | 4268 | if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) |
@@ -4182,7 +4402,7 @@ static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fat | |||
4182 | { | 4402 | { |
4183 | __be32 *savep; | 4403 | __be32 *savep; |
4184 | uint32_t attrlen, | 4404 | uint32_t attrlen, |
4185 | bitmap[2] = {0}; | 4405 | bitmap[3] = {0}; |
4186 | int status; | 4406 | int status; |
4187 | 4407 | ||
4188 | status = decode_op_hdr(xdr, OP_GETATTR); | 4408 | status = decode_op_hdr(xdr, OP_GETATTR); |
@@ -4268,10 +4488,32 @@ static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap, | |||
4268 | return status; | 4488 | return status; |
4269 | } | 4489 | } |
4270 | 4490 | ||
4491 | /* | ||
4492 | * The prefered block size for layout directed io | ||
4493 | */ | ||
4494 | static int decode_attr_layout_blksize(struct xdr_stream *xdr, uint32_t *bitmap, | ||
4495 | uint32_t *res) | ||
4496 | { | ||
4497 | __be32 *p; | ||
4498 | |||
4499 | dprintk("%s: bitmap is %x\n", __func__, bitmap[2]); | ||
4500 | *res = 0; | ||
4501 | if (bitmap[2] & FATTR4_WORD2_LAYOUT_BLKSIZE) { | ||
4502 | p = xdr_inline_decode(xdr, 4); | ||
4503 | if (unlikely(!p)) { | ||
4504 | print_overflow_msg(__func__, xdr); | ||
4505 | return -EIO; | ||
4506 | } | ||
4507 | *res = be32_to_cpup(p); | ||
4508 | bitmap[2] &= ~FATTR4_WORD2_LAYOUT_BLKSIZE; | ||
4509 | } | ||
4510 | return 0; | ||
4511 | } | ||
4512 | |||
4271 | static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) | 4513 | static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) |
4272 | { | 4514 | { |
4273 | __be32 *savep; | 4515 | __be32 *savep; |
4274 | uint32_t attrlen, bitmap[2]; | 4516 | uint32_t attrlen, bitmap[3]; |
4275 | int status; | 4517 | int status; |
4276 | 4518 | ||
4277 | if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) | 4519 | if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) |
@@ -4299,6 +4541,9 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) | |||
4299 | status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttype); | 4541 | status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttype); |
4300 | if (status != 0) | 4542 | if (status != 0) |
4301 | goto xdr_error; | 4543 | goto xdr_error; |
4544 | status = decode_attr_layout_blksize(xdr, bitmap, &fsinfo->blksize); | ||
4545 | if (status) | ||
4546 | goto xdr_error; | ||
4302 | 4547 | ||
4303 | status = verify_attr_len(xdr, savep, attrlen); | 4548 | status = verify_attr_len(xdr, savep, attrlen); |
4304 | xdr_error: | 4549 | xdr_error: |
@@ -4714,17 +4959,18 @@ decode_restorefh(struct xdr_stream *xdr) | |||
4714 | } | 4959 | } |
4715 | 4960 | ||
4716 | static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, | 4961 | static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, |
4717 | size_t *acl_len) | 4962 | struct nfs_getaclres *res) |
4718 | { | 4963 | { |
4719 | __be32 *savep; | 4964 | __be32 *savep, *bm_p; |
4720 | uint32_t attrlen, | 4965 | uint32_t attrlen, |
4721 | bitmap[2] = {0}; | 4966 | bitmap[3] = {0}; |
4722 | struct kvec *iov = req->rq_rcv_buf.head; | 4967 | struct kvec *iov = req->rq_rcv_buf.head; |
4723 | int status; | 4968 | int status; |
4724 | 4969 | ||
4725 | *acl_len = 0; | 4970 | res->acl_len = 0; |
4726 | if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) | 4971 | if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) |
4727 | goto out; | 4972 | goto out; |
4973 | bm_p = xdr->p; | ||
4728 | if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) | 4974 | if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) |
4729 | goto out; | 4975 | goto out; |
4730 | if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0) | 4976 | if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0) |
@@ -4736,18 +4982,30 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, | |||
4736 | size_t hdrlen; | 4982 | size_t hdrlen; |
4737 | u32 recvd; | 4983 | u32 recvd; |
4738 | 4984 | ||
4985 | /* The bitmap (xdr len + bitmaps) and the attr xdr len words | ||
4986 | * are stored with the acl data to handle the problem of | ||
4987 | * variable length bitmaps.*/ | ||
4988 | xdr->p = bm_p; | ||
4989 | res->acl_data_offset = be32_to_cpup(bm_p) + 2; | ||
4990 | res->acl_data_offset <<= 2; | ||
4991 | |||
4739 | /* We ignore &savep and don't do consistency checks on | 4992 | /* We ignore &savep and don't do consistency checks on |
4740 | * the attr length. Let userspace figure it out.... */ | 4993 | * the attr length. Let userspace figure it out.... */ |
4741 | hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base; | 4994 | hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base; |
4995 | attrlen += res->acl_data_offset; | ||
4742 | recvd = req->rq_rcv_buf.len - hdrlen; | 4996 | recvd = req->rq_rcv_buf.len - hdrlen; |
4743 | if (attrlen > recvd) { | 4997 | if (attrlen > recvd) { |
4744 | dprintk("NFS: server cheating in getattr" | 4998 | if (res->acl_flags & NFS4_ACL_LEN_REQUEST) { |
4745 | " acl reply: attrlen %u > recvd %u\n", | 4999 | /* getxattr interface called with a NULL buf */ |
5000 | res->acl_len = attrlen; | ||
5001 | goto out; | ||
5002 | } | ||
5003 | dprintk("NFS: acl reply: attrlen %u > recvd %u\n", | ||
4746 | attrlen, recvd); | 5004 | attrlen, recvd); |
4747 | return -EINVAL; | 5005 | return -EINVAL; |
4748 | } | 5006 | } |
4749 | xdr_read_pages(xdr, attrlen); | 5007 | xdr_read_pages(xdr, attrlen); |
4750 | *acl_len = attrlen; | 5008 | res->acl_len = attrlen; |
4751 | } else | 5009 | } else |
4752 | status = -EOPNOTSUPP; | 5010 | status = -EOPNOTSUPP; |
4753 | 5011 | ||
@@ -4977,11 +5235,17 @@ static int decode_exchange_id(struct xdr_stream *xdr, | |||
4977 | if (unlikely(status)) | 5235 | if (unlikely(status)) |
4978 | return status; | 5236 | return status; |
4979 | 5237 | ||
4980 | /* Throw away server_scope */ | 5238 | /* Save server_scope */ |
4981 | status = decode_opaque_inline(xdr, &dummy, &dummy_str); | 5239 | status = decode_opaque_inline(xdr, &dummy, &dummy_str); |
4982 | if (unlikely(status)) | 5240 | if (unlikely(status)) |
4983 | return status; | 5241 | return status; |
4984 | 5242 | ||
5243 | if (unlikely(dummy > NFS4_OPAQUE_LIMIT)) | ||
5244 | return -EIO; | ||
5245 | |||
5246 | memcpy(res->server_scope->server_scope, dummy_str, dummy); | ||
5247 | res->server_scope->server_scope_sz = dummy; | ||
5248 | |||
4985 | /* Throw away Implementation id array */ | 5249 | /* Throw away Implementation id array */ |
4986 | status = decode_opaque_inline(xdr, &dummy, &dummy_str); | 5250 | status = decode_opaque_inline(xdr, &dummy, &dummy_str); |
4987 | if (unlikely(status)) | 5251 | if (unlikely(status)) |
@@ -5141,6 +5405,53 @@ out_overflow: | |||
5141 | } | 5405 | } |
5142 | 5406 | ||
5143 | #if defined(CONFIG_NFS_V4_1) | 5407 | #if defined(CONFIG_NFS_V4_1) |
5408 | /* | ||
5409 | * TODO: Need to handle case when EOF != true; | ||
5410 | */ | ||
5411 | static int decode_getdevicelist(struct xdr_stream *xdr, | ||
5412 | struct pnfs_devicelist *res) | ||
5413 | { | ||
5414 | __be32 *p; | ||
5415 | int status, i; | ||
5416 | struct nfs_writeverf verftemp; | ||
5417 | |||
5418 | status = decode_op_hdr(xdr, OP_GETDEVICELIST); | ||
5419 | if (status) | ||
5420 | return status; | ||
5421 | |||
5422 | p = xdr_inline_decode(xdr, 8 + 8 + 4); | ||
5423 | if (unlikely(!p)) | ||
5424 | goto out_overflow; | ||
5425 | |||
5426 | /* TODO: Skip cookie for now */ | ||
5427 | p += 2; | ||
5428 | |||
5429 | /* Read verifier */ | ||
5430 | p = xdr_decode_opaque_fixed(p, verftemp.verifier, 8); | ||
5431 | |||
5432 | res->num_devs = be32_to_cpup(p); | ||
5433 | |||
5434 | dprintk("%s: num_dev %d\n", __func__, res->num_devs); | ||
5435 | |||
5436 | if (res->num_devs > NFS4_PNFS_GETDEVLIST_MAXNUM) { | ||
5437 | printk(KERN_ERR "%s too many result dev_num %u\n", | ||
5438 | __func__, res->num_devs); | ||
5439 | return -EIO; | ||
5440 | } | ||
5441 | |||
5442 | p = xdr_inline_decode(xdr, | ||
5443 | res->num_devs * NFS4_DEVICEID4_SIZE + 4); | ||
5444 | if (unlikely(!p)) | ||
5445 | goto out_overflow; | ||
5446 | for (i = 0; i < res->num_devs; i++) | ||
5447 | p = xdr_decode_opaque_fixed(p, res->dev_id[i].data, | ||
5448 | NFS4_DEVICEID4_SIZE); | ||
5449 | res->eof = be32_to_cpup(p); | ||
5450 | return 0; | ||
5451 | out_overflow: | ||
5452 | print_overflow_msg(__func__, xdr); | ||
5453 | return -EIO; | ||
5454 | } | ||
5144 | 5455 | ||
5145 | static int decode_getdeviceinfo(struct xdr_stream *xdr, | 5456 | static int decode_getdeviceinfo(struct xdr_stream *xdr, |
5146 | struct pnfs_device *pdev) | 5457 | struct pnfs_device *pdev) |
@@ -5303,6 +5614,7 @@ static int decode_layoutcommit(struct xdr_stream *xdr, | |||
5303 | int status; | 5614 | int status; |
5304 | 5615 | ||
5305 | status = decode_op_hdr(xdr, OP_LAYOUTCOMMIT); | 5616 | status = decode_op_hdr(xdr, OP_LAYOUTCOMMIT); |
5617 | res->status = status; | ||
5306 | if (status) | 5618 | if (status) |
5307 | return status; | 5619 | return status; |
5308 | 5620 | ||
@@ -5322,6 +5634,55 @@ out_overflow: | |||
5322 | print_overflow_msg(__func__, xdr); | 5634 | print_overflow_msg(__func__, xdr); |
5323 | return -EIO; | 5635 | return -EIO; |
5324 | } | 5636 | } |
5637 | |||
5638 | static int decode_test_stateid(struct xdr_stream *xdr, | ||
5639 | struct nfs41_test_stateid_res *res) | ||
5640 | { | ||
5641 | __be32 *p; | ||
5642 | int status; | ||
5643 | int num_res; | ||
5644 | |||
5645 | status = decode_op_hdr(xdr, OP_TEST_STATEID); | ||
5646 | if (status) | ||
5647 | return status; | ||
5648 | |||
5649 | p = xdr_inline_decode(xdr, 4); | ||
5650 | if (unlikely(!p)) | ||
5651 | goto out_overflow; | ||
5652 | num_res = be32_to_cpup(p++); | ||
5653 | if (num_res != 1) | ||
5654 | goto out; | ||
5655 | |||
5656 | p = xdr_inline_decode(xdr, 4); | ||
5657 | if (unlikely(!p)) | ||
5658 | goto out_overflow; | ||
5659 | res->status = be32_to_cpup(p++); | ||
5660 | return res->status; | ||
5661 | out_overflow: | ||
5662 | print_overflow_msg(__func__, xdr); | ||
5663 | out: | ||
5664 | return -EIO; | ||
5665 | } | ||
5666 | |||
5667 | static int decode_free_stateid(struct xdr_stream *xdr, | ||
5668 | struct nfs41_free_stateid_res *res) | ||
5669 | { | ||
5670 | __be32 *p; | ||
5671 | int status; | ||
5672 | |||
5673 | status = decode_op_hdr(xdr, OP_FREE_STATEID); | ||
5674 | if (status) | ||
5675 | return status; | ||
5676 | |||
5677 | p = xdr_inline_decode(xdr, 4); | ||
5678 | if (unlikely(!p)) | ||
5679 | goto out_overflow; | ||
5680 | res->status = be32_to_cpup(p++); | ||
5681 | return res->status; | ||
5682 | out_overflow: | ||
5683 | print_overflow_msg(__func__, xdr); | ||
5684 | return -EIO; | ||
5685 | } | ||
5325 | #endif /* CONFIG_NFS_V4_1 */ | 5686 | #endif /* CONFIG_NFS_V4_1 */ |
5326 | 5687 | ||
5327 | /* | 5688 | /* |
@@ -5682,7 +6043,7 @@ nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr, | |||
5682 | status = decode_putfh(xdr); | 6043 | status = decode_putfh(xdr); |
5683 | if (status) | 6044 | if (status) |
5684 | goto out; | 6045 | goto out; |
5685 | status = decode_getacl(xdr, rqstp, &res->acl_len); | 6046 | status = decode_getacl(xdr, rqstp, res); |
5686 | 6047 | ||
5687 | out: | 6048 | out: |
5688 | return status; | 6049 | return status; |
@@ -6366,6 +6727,32 @@ static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp, | |||
6366 | } | 6727 | } |
6367 | 6728 | ||
6368 | /* | 6729 | /* |
6730 | * Decode GETDEVICELIST response | ||
6731 | */ | ||
6732 | static int nfs4_xdr_dec_getdevicelist(struct rpc_rqst *rqstp, | ||
6733 | struct xdr_stream *xdr, | ||
6734 | struct nfs4_getdevicelist_res *res) | ||
6735 | { | ||
6736 | struct compound_hdr hdr; | ||
6737 | int status; | ||
6738 | |||
6739 | dprintk("encoding getdevicelist!\n"); | ||
6740 | |||
6741 | status = decode_compound_hdr(xdr, &hdr); | ||
6742 | if (status != 0) | ||
6743 | goto out; | ||
6744 | status = decode_sequence(xdr, &res->seq_res, rqstp); | ||
6745 | if (status != 0) | ||
6746 | goto out; | ||
6747 | status = decode_putfh(xdr); | ||
6748 | if (status != 0) | ||
6749 | goto out; | ||
6750 | status = decode_getdevicelist(xdr, res->devlist); | ||
6751 | out: | ||
6752 | return status; | ||
6753 | } | ||
6754 | |||
6755 | /* | ||
6369 | * Decode GETDEVINFO response | 6756 | * Decode GETDEVINFO response |
6370 | */ | 6757 | */ |
6371 | static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp, | 6758 | static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp, |
@@ -6461,6 +6848,72 @@ static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp, | |||
6461 | out: | 6848 | out: |
6462 | return status; | 6849 | return status; |
6463 | } | 6850 | } |
6851 | |||
6852 | /* | ||
6853 | * Decode SECINFO_NO_NAME response | ||
6854 | */ | ||
6855 | static int nfs4_xdr_dec_secinfo_no_name(struct rpc_rqst *rqstp, | ||
6856 | struct xdr_stream *xdr, | ||
6857 | struct nfs4_secinfo_res *res) | ||
6858 | { | ||
6859 | struct compound_hdr hdr; | ||
6860 | int status; | ||
6861 | |||
6862 | status = decode_compound_hdr(xdr, &hdr); | ||
6863 | if (status) | ||
6864 | goto out; | ||
6865 | status = decode_sequence(xdr, &res->seq_res, rqstp); | ||
6866 | if (status) | ||
6867 | goto out; | ||
6868 | status = decode_putrootfh(xdr); | ||
6869 | if (status) | ||
6870 | goto out; | ||
6871 | status = decode_secinfo(xdr, res); | ||
6872 | out: | ||
6873 | return status; | ||
6874 | } | ||
6875 | |||
6876 | /* | ||
6877 | * Decode TEST_STATEID response | ||
6878 | */ | ||
6879 | static int nfs4_xdr_dec_test_stateid(struct rpc_rqst *rqstp, | ||
6880 | struct xdr_stream *xdr, | ||
6881 | struct nfs41_test_stateid_res *res) | ||
6882 | { | ||
6883 | struct compound_hdr hdr; | ||
6884 | int status; | ||
6885 | |||
6886 | status = decode_compound_hdr(xdr, &hdr); | ||
6887 | if (status) | ||
6888 | goto out; | ||
6889 | status = decode_sequence(xdr, &res->seq_res, rqstp); | ||
6890 | if (status) | ||
6891 | goto out; | ||
6892 | status = decode_test_stateid(xdr, res); | ||
6893 | out: | ||
6894 | return status; | ||
6895 | } | ||
6896 | |||
6897 | /* | ||
6898 | * Decode FREE_STATEID response | ||
6899 | */ | ||
6900 | static int nfs4_xdr_dec_free_stateid(struct rpc_rqst *rqstp, | ||
6901 | struct xdr_stream *xdr, | ||
6902 | struct nfs41_free_stateid_res *res) | ||
6903 | { | ||
6904 | struct compound_hdr hdr; | ||
6905 | int status; | ||
6906 | |||
6907 | status = decode_compound_hdr(xdr, &hdr); | ||
6908 | if (status) | ||
6909 | goto out; | ||
6910 | status = decode_sequence(xdr, &res->seq_res, rqstp); | ||
6911 | if (status) | ||
6912 | goto out; | ||
6913 | status = decode_free_stateid(xdr, res); | ||
6914 | out: | ||
6915 | return status; | ||
6916 | } | ||
6464 | #endif /* CONFIG_NFS_V4_1 */ | 6917 | #endif /* CONFIG_NFS_V4_1 */ |
6465 | 6918 | ||
6466 | /** | 6919 | /** |
@@ -6480,7 +6933,7 @@ out: | |||
6480 | int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, | 6933 | int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, |
6481 | int plus) | 6934 | int plus) |
6482 | { | 6935 | { |
6483 | uint32_t bitmap[2] = {0}; | 6936 | uint32_t bitmap[3] = {0}; |
6484 | uint32_t len; | 6937 | uint32_t len; |
6485 | __be32 *p = xdr_inline_decode(xdr, 4); | 6938 | __be32 *p = xdr_inline_decode(xdr, 4); |
6486 | if (unlikely(!p)) | 6939 | if (unlikely(!p)) |
@@ -6663,6 +7116,10 @@ struct rpc_procinfo nfs4_procedures[] = { | |||
6663 | PROC(LAYOUTGET, enc_layoutget, dec_layoutget), | 7116 | PROC(LAYOUTGET, enc_layoutget, dec_layoutget), |
6664 | PROC(LAYOUTCOMMIT, enc_layoutcommit, dec_layoutcommit), | 7117 | PROC(LAYOUTCOMMIT, enc_layoutcommit, dec_layoutcommit), |
6665 | PROC(LAYOUTRETURN, enc_layoutreturn, dec_layoutreturn), | 7118 | PROC(LAYOUTRETURN, enc_layoutreturn, dec_layoutreturn), |
7119 | PROC(SECINFO_NO_NAME, enc_secinfo_no_name, dec_secinfo_no_name), | ||
7120 | PROC(TEST_STATEID, enc_test_stateid, dec_test_stateid), | ||
7121 | PROC(FREE_STATEID, enc_free_stateid, dec_free_stateid), | ||
7122 | PROC(GETDEVICELIST, enc_getdevicelist, dec_getdevicelist), | ||
6666 | #endif /* CONFIG_NFS_V4_1 */ | 7123 | #endif /* CONFIG_NFS_V4_1 */ |
6667 | }; | 7124 | }; |
6668 | 7125 | ||
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 8ff2ea3f10e..d0cda12fddc 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c | |||
@@ -479,7 +479,6 @@ static int _io_check(struct objio_state *ios, bool is_write) | |||
479 | for (i = 0; i < ios->numdevs; i++) { | 479 | for (i = 0; i < ios->numdevs; i++) { |
480 | struct osd_sense_info osi; | 480 | struct osd_sense_info osi; |
481 | struct osd_request *or = ios->per_dev[i].or; | 481 | struct osd_request *or = ios->per_dev[i].or; |
482 | unsigned dev; | ||
483 | int ret; | 482 | int ret; |
484 | 483 | ||
485 | if (!or) | 484 | if (!or) |
@@ -500,9 +499,8 @@ static int _io_check(struct objio_state *ios, bool is_write) | |||
500 | 499 | ||
501 | continue; /* we recovered */ | 500 | continue; /* we recovered */ |
502 | } | 501 | } |
503 | dev = ios->per_dev[i].dev; | 502 | objlayout_io_set_result(&ios->ol_state, i, |
504 | objlayout_io_set_result(&ios->ol_state, dev, | 503 | &ios->layout->comps[i].oc_object_id, |
505 | &ios->layout->comps[dev].oc_object_id, | ||
506 | osd_pri_2_pnfs_err(osi.osd_err_pri), | 504 | osd_pri_2_pnfs_err(osi.osd_err_pri), |
507 | ios->per_dev[i].offset, | 505 | ios->per_dev[i].offset, |
508 | ios->per_dev[i].length, | 506 | ios->per_dev[i].length, |
@@ -589,22 +587,19 @@ static void _calc_stripe_info(struct objio_state *ios, u64 file_offset, | |||
589 | } | 587 | } |
590 | 588 | ||
591 | static int _add_stripe_unit(struct objio_state *ios, unsigned *cur_pg, | 589 | static int _add_stripe_unit(struct objio_state *ios, unsigned *cur_pg, |
592 | unsigned pgbase, struct _objio_per_comp *per_dev, int cur_len, | 590 | unsigned pgbase, struct _objio_per_comp *per_dev, int len, |
593 | gfp_t gfp_flags) | 591 | gfp_t gfp_flags) |
594 | { | 592 | { |
595 | unsigned pg = *cur_pg; | 593 | unsigned pg = *cur_pg; |
594 | int cur_len = len; | ||
596 | struct request_queue *q = | 595 | struct request_queue *q = |
597 | osd_request_queue(_io_od(ios, per_dev->dev)); | 596 | osd_request_queue(_io_od(ios, per_dev->dev)); |
598 | 597 | ||
599 | per_dev->length += cur_len; | ||
600 | |||
601 | if (per_dev->bio == NULL) { | 598 | if (per_dev->bio == NULL) { |
602 | unsigned stripes = ios->layout->num_comps / | 599 | unsigned pages_in_stripe = ios->layout->group_width * |
603 | ios->layout->mirrors_p1; | ||
604 | unsigned pages_in_stripe = stripes * | ||
605 | (ios->layout->stripe_unit / PAGE_SIZE); | 600 | (ios->layout->stripe_unit / PAGE_SIZE); |
606 | unsigned bio_size = (ios->ol_state.nr_pages + pages_in_stripe) / | 601 | unsigned bio_size = (ios->ol_state.nr_pages + pages_in_stripe) / |
607 | stripes; | 602 | ios->layout->group_width; |
608 | 603 | ||
609 | if (BIO_MAX_PAGES_KMALLOC < bio_size) | 604 | if (BIO_MAX_PAGES_KMALLOC < bio_size) |
610 | bio_size = BIO_MAX_PAGES_KMALLOC; | 605 | bio_size = BIO_MAX_PAGES_KMALLOC; |
@@ -632,6 +627,7 @@ static int _add_stripe_unit(struct objio_state *ios, unsigned *cur_pg, | |||
632 | } | 627 | } |
633 | BUG_ON(cur_len); | 628 | BUG_ON(cur_len); |
634 | 629 | ||
630 | per_dev->length += len; | ||
635 | *cur_pg = pg; | 631 | *cur_pg = pg; |
636 | return 0; | 632 | return 0; |
637 | } | 633 | } |
@@ -650,7 +646,7 @@ static int _prepare_one_group(struct objio_state *ios, u64 length, | |||
650 | int ret = 0; | 646 | int ret = 0; |
651 | 647 | ||
652 | while (length) { | 648 | while (length) { |
653 | struct _objio_per_comp *per_dev = &ios->per_dev[dev]; | 649 | struct _objio_per_comp *per_dev = &ios->per_dev[dev - first_dev]; |
654 | unsigned cur_len, page_off = 0; | 650 | unsigned cur_len, page_off = 0; |
655 | 651 | ||
656 | if (!per_dev->length) { | 652 | if (!per_dev->length) { |
@@ -670,8 +666,8 @@ static int _prepare_one_group(struct objio_state *ios, u64 length, | |||
670 | cur_len = stripe_unit; | 666 | cur_len = stripe_unit; |
671 | } | 667 | } |
672 | 668 | ||
673 | if (max_comp < dev) | 669 | if (max_comp < dev - first_dev) |
674 | max_comp = dev; | 670 | max_comp = dev - first_dev; |
675 | } else { | 671 | } else { |
676 | cur_len = stripe_unit; | 672 | cur_len = stripe_unit; |
677 | } | 673 | } |
@@ -806,7 +802,7 @@ static int _read_mirrors(struct objio_state *ios, unsigned cur_comp) | |||
806 | struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp]; | 802 | struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp]; |
807 | unsigned dev = per_dev->dev; | 803 | unsigned dev = per_dev->dev; |
808 | struct pnfs_osd_object_cred *cred = | 804 | struct pnfs_osd_object_cred *cred = |
809 | &ios->layout->comps[dev]; | 805 | &ios->layout->comps[cur_comp]; |
810 | struct osd_obj_id obj = { | 806 | struct osd_obj_id obj = { |
811 | .partition = cred->oc_object_id.oid_partition_id, | 807 | .partition = cred->oc_object_id.oid_partition_id, |
812 | .id = cred->oc_object_id.oid_object_id, | 808 | .id = cred->oc_object_id.oid_object_id, |
@@ -904,7 +900,7 @@ static int _write_mirrors(struct objio_state *ios, unsigned cur_comp) | |||
904 | for (; cur_comp < last_comp; ++cur_comp, ++dev) { | 900 | for (; cur_comp < last_comp; ++cur_comp, ++dev) { |
905 | struct osd_request *or = NULL; | 901 | struct osd_request *or = NULL; |
906 | struct pnfs_osd_object_cred *cred = | 902 | struct pnfs_osd_object_cred *cred = |
907 | &ios->layout->comps[dev]; | 903 | &ios->layout->comps[cur_comp]; |
908 | struct osd_obj_id obj = { | 904 | struct osd_obj_id obj = { |
909 | .partition = cred->oc_object_id.oid_partition_id, | 905 | .partition = cred->oc_object_id.oid_partition_id, |
910 | .id = cred->oc_object_id.oid_object_id, | 906 | .id = cred->oc_object_id.oid_object_id, |
@@ -1000,13 +996,22 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, | |||
1000 | if (!pnfs_generic_pg_test(pgio, prev, req)) | 996 | if (!pnfs_generic_pg_test(pgio, prev, req)) |
1001 | return false; | 997 | return false; |
1002 | 998 | ||
1003 | if (pgio->pg_lseg == NULL) | ||
1004 | return true; | ||
1005 | |||
1006 | return pgio->pg_count + req->wb_bytes <= | 999 | return pgio->pg_count + req->wb_bytes <= |
1007 | OBJIO_LSEG(pgio->pg_lseg)->max_io_size; | 1000 | OBJIO_LSEG(pgio->pg_lseg)->max_io_size; |
1008 | } | 1001 | } |
1009 | 1002 | ||
1003 | static const struct nfs_pageio_ops objio_pg_read_ops = { | ||
1004 | .pg_init = pnfs_generic_pg_init_read, | ||
1005 | .pg_test = objio_pg_test, | ||
1006 | .pg_doio = pnfs_generic_pg_readpages, | ||
1007 | }; | ||
1008 | |||
1009 | static const struct nfs_pageio_ops objio_pg_write_ops = { | ||
1010 | .pg_init = pnfs_generic_pg_init_write, | ||
1011 | .pg_test = objio_pg_test, | ||
1012 | .pg_doio = pnfs_generic_pg_writepages, | ||
1013 | }; | ||
1014 | |||
1010 | static struct pnfs_layoutdriver_type objlayout_type = { | 1015 | static struct pnfs_layoutdriver_type objlayout_type = { |
1011 | .id = LAYOUT_OSD2_OBJECTS, | 1016 | .id = LAYOUT_OSD2_OBJECTS, |
1012 | .name = "LAYOUT_OSD2_OBJECTS", | 1017 | .name = "LAYOUT_OSD2_OBJECTS", |
@@ -1020,7 +1025,8 @@ static struct pnfs_layoutdriver_type objlayout_type = { | |||
1020 | 1025 | ||
1021 | .read_pagelist = objlayout_read_pagelist, | 1026 | .read_pagelist = objlayout_read_pagelist, |
1022 | .write_pagelist = objlayout_write_pagelist, | 1027 | .write_pagelist = objlayout_write_pagelist, |
1023 | .pg_test = objio_pg_test, | 1028 | .pg_read_ops = &objio_pg_read_ops, |
1029 | .pg_write_ops = &objio_pg_write_ops, | ||
1024 | 1030 | ||
1025 | .free_deviceid_node = objio_free_deviceid_node, | 1031 | .free_deviceid_node = objio_free_deviceid_node, |
1026 | 1032 | ||
@@ -1055,5 +1061,7 @@ objlayout_exit(void) | |||
1055 | __func__); | 1061 | __func__); |
1056 | } | 1062 | } |
1057 | 1063 | ||
1064 | MODULE_ALIAS("nfs-layouttype4-2"); | ||
1065 | |||
1058 | module_init(objlayout_init); | 1066 | module_init(objlayout_init); |
1059 | module_exit(objlayout_exit); | 1067 | module_exit(objlayout_exit); |
diff --git a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c index 16fc758e912..b3918f7ac34 100644 --- a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c +++ b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c | |||
@@ -170,6 +170,9 @@ int pnfs_osd_xdr_decode_layout_map(struct pnfs_osd_layout *layout, | |||
170 | p = _osd_xdr_decode_data_map(p, &layout->olo_map); | 170 | p = _osd_xdr_decode_data_map(p, &layout->olo_map); |
171 | layout->olo_comps_index = be32_to_cpup(p++); | 171 | layout->olo_comps_index = be32_to_cpup(p++); |
172 | layout->olo_num_comps = be32_to_cpup(p++); | 172 | layout->olo_num_comps = be32_to_cpup(p++); |
173 | dprintk("%s: olo_comps_index=%d olo_num_comps=%d\n", __func__, | ||
174 | layout->olo_comps_index, layout->olo_num_comps); | ||
175 | |||
173 | iter->total_comps = layout->olo_num_comps; | 176 | iter->total_comps = layout->olo_num_comps; |
174 | return 0; | 177 | return 0; |
175 | } | 178 | } |
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 00985571628..b60970cc7f1 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c | |||
@@ -114,7 +114,7 @@ int nfs_set_page_tag_locked(struct nfs_page *req) | |||
114 | if (!nfs_lock_request_dontget(req)) | 114 | if (!nfs_lock_request_dontget(req)) |
115 | return 0; | 115 | return 0; |
116 | if (test_bit(PG_MAPPED, &req->wb_flags)) | 116 | if (test_bit(PG_MAPPED, &req->wb_flags)) |
117 | radix_tree_tag_set(&NFS_I(req->wb_context->path.dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); | 117 | radix_tree_tag_set(&NFS_I(req->wb_context->dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); |
118 | return 1; | 118 | return 1; |
119 | } | 119 | } |
120 | 120 | ||
@@ -124,7 +124,7 @@ int nfs_set_page_tag_locked(struct nfs_page *req) | |||
124 | void nfs_clear_page_tag_locked(struct nfs_page *req) | 124 | void nfs_clear_page_tag_locked(struct nfs_page *req) |
125 | { | 125 | { |
126 | if (test_bit(PG_MAPPED, &req->wb_flags)) { | 126 | if (test_bit(PG_MAPPED, &req->wb_flags)) { |
127 | struct inode *inode = req->wb_context->path.dentry->d_inode; | 127 | struct inode *inode = req->wb_context->dentry->d_inode; |
128 | struct nfs_inode *nfsi = NFS_I(inode); | 128 | struct nfs_inode *nfsi = NFS_I(inode); |
129 | 129 | ||
130 | spin_lock(&inode->i_lock); | 130 | spin_lock(&inode->i_lock); |
@@ -230,7 +230,7 @@ EXPORT_SYMBOL_GPL(nfs_generic_pg_test); | |||
230 | */ | 230 | */ |
231 | void nfs_pageio_init(struct nfs_pageio_descriptor *desc, | 231 | void nfs_pageio_init(struct nfs_pageio_descriptor *desc, |
232 | struct inode *inode, | 232 | struct inode *inode, |
233 | int (*doio)(struct nfs_pageio_descriptor *), | 233 | const struct nfs_pageio_ops *pg_ops, |
234 | size_t bsize, | 234 | size_t bsize, |
235 | int io_flags) | 235 | int io_flags) |
236 | { | 236 | { |
@@ -240,13 +240,12 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, | |||
240 | desc->pg_bsize = bsize; | 240 | desc->pg_bsize = bsize; |
241 | desc->pg_base = 0; | 241 | desc->pg_base = 0; |
242 | desc->pg_moreio = 0; | 242 | desc->pg_moreio = 0; |
243 | desc->pg_recoalesce = 0; | ||
243 | desc->pg_inode = inode; | 244 | desc->pg_inode = inode; |
244 | desc->pg_doio = doio; | 245 | desc->pg_ops = pg_ops; |
245 | desc->pg_ioflags = io_flags; | 246 | desc->pg_ioflags = io_flags; |
246 | desc->pg_error = 0; | 247 | desc->pg_error = 0; |
247 | desc->pg_lseg = NULL; | 248 | desc->pg_lseg = NULL; |
248 | desc->pg_test = nfs_generic_pg_test; | ||
249 | pnfs_pageio_init(desc, inode); | ||
250 | } | 249 | } |
251 | 250 | ||
252 | /** | 251 | /** |
@@ -276,7 +275,7 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, | |||
276 | return false; | 275 | return false; |
277 | if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) | 276 | if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) |
278 | return false; | 277 | return false; |
279 | return pgio->pg_test(pgio, prev, req); | 278 | return pgio->pg_ops->pg_test(pgio, prev, req); |
280 | } | 279 | } |
281 | 280 | ||
282 | /** | 281 | /** |
@@ -297,6 +296,8 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, | |||
297 | if (!nfs_can_coalesce_requests(prev, req, desc)) | 296 | if (!nfs_can_coalesce_requests(prev, req, desc)) |
298 | return 0; | 297 | return 0; |
299 | } else { | 298 | } else { |
299 | if (desc->pg_ops->pg_init) | ||
300 | desc->pg_ops->pg_init(desc, req); | ||
300 | desc->pg_base = req->wb_pgbase; | 301 | desc->pg_base = req->wb_pgbase; |
301 | } | 302 | } |
302 | nfs_list_remove_request(req); | 303 | nfs_list_remove_request(req); |
@@ -311,7 +312,7 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, | |||
311 | static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) | 312 | static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) |
312 | { | 313 | { |
313 | if (!list_empty(&desc->pg_list)) { | 314 | if (!list_empty(&desc->pg_list)) { |
314 | int error = desc->pg_doio(desc); | 315 | int error = desc->pg_ops->pg_doio(desc); |
315 | if (error < 0) | 316 | if (error < 0) |
316 | desc->pg_error = error; | 317 | desc->pg_error = error; |
317 | else | 318 | else |
@@ -331,7 +332,7 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) | |||
331 | * Returns true if the request 'req' was successfully coalesced into the | 332 | * Returns true if the request 'req' was successfully coalesced into the |
332 | * existing list of pages 'desc'. | 333 | * existing list of pages 'desc'. |
333 | */ | 334 | */ |
334 | int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, | 335 | static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, |
335 | struct nfs_page *req) | 336 | struct nfs_page *req) |
336 | { | 337 | { |
337 | while (!nfs_pageio_do_add_request(desc, req)) { | 338 | while (!nfs_pageio_do_add_request(desc, req)) { |
@@ -340,17 +341,67 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, | |||
340 | if (desc->pg_error < 0) | 341 | if (desc->pg_error < 0) |
341 | return 0; | 342 | return 0; |
342 | desc->pg_moreio = 0; | 343 | desc->pg_moreio = 0; |
344 | if (desc->pg_recoalesce) | ||
345 | return 0; | ||
343 | } | 346 | } |
344 | return 1; | 347 | return 1; |
345 | } | 348 | } |
346 | 349 | ||
350 | static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) | ||
351 | { | ||
352 | LIST_HEAD(head); | ||
353 | |||
354 | do { | ||
355 | list_splice_init(&desc->pg_list, &head); | ||
356 | desc->pg_bytes_written -= desc->pg_count; | ||
357 | desc->pg_count = 0; | ||
358 | desc->pg_base = 0; | ||
359 | desc->pg_recoalesce = 0; | ||
360 | |||
361 | while (!list_empty(&head)) { | ||
362 | struct nfs_page *req; | ||
363 | |||
364 | req = list_first_entry(&head, struct nfs_page, wb_list); | ||
365 | nfs_list_remove_request(req); | ||
366 | if (__nfs_pageio_add_request(desc, req)) | ||
367 | continue; | ||
368 | if (desc->pg_error < 0) | ||
369 | return 0; | ||
370 | break; | ||
371 | } | ||
372 | } while (desc->pg_recoalesce); | ||
373 | return 1; | ||
374 | } | ||
375 | |||
376 | int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, | ||
377 | struct nfs_page *req) | ||
378 | { | ||
379 | int ret; | ||
380 | |||
381 | do { | ||
382 | ret = __nfs_pageio_add_request(desc, req); | ||
383 | if (ret) | ||
384 | break; | ||
385 | if (desc->pg_error < 0) | ||
386 | break; | ||
387 | ret = nfs_do_recoalesce(desc); | ||
388 | } while (ret); | ||
389 | return ret; | ||
390 | } | ||
391 | |||
347 | /** | 392 | /** |
348 | * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor | 393 | * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor |
349 | * @desc: pointer to io descriptor | 394 | * @desc: pointer to io descriptor |
350 | */ | 395 | */ |
351 | void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) | 396 | void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) |
352 | { | 397 | { |
353 | nfs_pageio_doio(desc); | 398 | for (;;) { |
399 | nfs_pageio_doio(desc); | ||
400 | if (!desc->pg_recoalesce) | ||
401 | break; | ||
402 | if (!nfs_do_recoalesce(desc)) | ||
403 | break; | ||
404 | } | ||
354 | } | 405 | } |
355 | 406 | ||
356 | /** | 407 | /** |
@@ -369,7 +420,7 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index) | |||
369 | if (!list_empty(&desc->pg_list)) { | 420 | if (!list_empty(&desc->pg_list)) { |
370 | struct nfs_page *prev = nfs_list_entry(desc->pg_list.prev); | 421 | struct nfs_page *prev = nfs_list_entry(desc->pg_list.prev); |
371 | if (index != prev->wb_index + 1) | 422 | if (index != prev->wb_index + 1) |
372 | nfs_pageio_doio(desc); | 423 | nfs_pageio_complete(desc); |
373 | } | 424 | } |
374 | } | 425 | } |
375 | 426 | ||
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 29c0ca7fc34..ee73d9a4f70 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
@@ -28,6 +28,7 @@ | |||
28 | */ | 28 | */ |
29 | 29 | ||
30 | #include <linux/nfs_fs.h> | 30 | #include <linux/nfs_fs.h> |
31 | #include <linux/nfs_page.h> | ||
31 | #include "internal.h" | 32 | #include "internal.h" |
32 | #include "pnfs.h" | 33 | #include "pnfs.h" |
33 | #include "iostat.h" | 34 | #include "iostat.h" |
@@ -75,8 +76,11 @@ find_pnfs_driver(u32 id) | |||
75 | void | 76 | void |
76 | unset_pnfs_layoutdriver(struct nfs_server *nfss) | 77 | unset_pnfs_layoutdriver(struct nfs_server *nfss) |
77 | { | 78 | { |
78 | if (nfss->pnfs_curr_ld) | 79 | if (nfss->pnfs_curr_ld) { |
80 | if (nfss->pnfs_curr_ld->clear_layoutdriver) | ||
81 | nfss->pnfs_curr_ld->clear_layoutdriver(nfss); | ||
79 | module_put(nfss->pnfs_curr_ld->owner); | 82 | module_put(nfss->pnfs_curr_ld->owner); |
83 | } | ||
80 | nfss->pnfs_curr_ld = NULL; | 84 | nfss->pnfs_curr_ld = NULL; |
81 | } | 85 | } |
82 | 86 | ||
@@ -87,7 +91,8 @@ unset_pnfs_layoutdriver(struct nfs_server *nfss) | |||
87 | * @id layout type. Zero (illegal layout type) indicates pNFS not in use. | 91 | * @id layout type. Zero (illegal layout type) indicates pNFS not in use. |
88 | */ | 92 | */ |
89 | void | 93 | void |
90 | set_pnfs_layoutdriver(struct nfs_server *server, u32 id) | 94 | set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh, |
95 | u32 id) | ||
91 | { | 96 | { |
92 | struct pnfs_layoutdriver_type *ld_type = NULL; | 97 | struct pnfs_layoutdriver_type *ld_type = NULL; |
93 | 98 | ||
@@ -114,6 +119,13 @@ set_pnfs_layoutdriver(struct nfs_server *server, u32 id) | |||
114 | goto out_no_driver; | 119 | goto out_no_driver; |
115 | } | 120 | } |
116 | server->pnfs_curr_ld = ld_type; | 121 | server->pnfs_curr_ld = ld_type; |
122 | if (ld_type->set_layoutdriver | ||
123 | && ld_type->set_layoutdriver(server, mntfh)) { | ||
124 | printk(KERN_ERR "%s: Error initializing pNFS layout driver %u.\n", | ||
125 | __func__, id); | ||
126 | module_put(ld_type->owner); | ||
127 | goto out_no_driver; | ||
128 | } | ||
117 | 129 | ||
118 | dprintk("%s: pNFS module for %u set\n", __func__, id); | 130 | dprintk("%s: pNFS module for %u set\n", __func__, id); |
119 | return; | 131 | return; |
@@ -189,6 +201,7 @@ static void | |||
189 | pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo) | 201 | pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo) |
190 | { | 202 | { |
191 | struct pnfs_layoutdriver_type *ld = NFS_SERVER(lo->plh_inode)->pnfs_curr_ld; | 203 | struct pnfs_layoutdriver_type *ld = NFS_SERVER(lo->plh_inode)->pnfs_curr_ld; |
204 | put_rpccred(lo->plh_lc_cred); | ||
192 | return ld->alloc_layout_hdr ? ld->free_layout_hdr(lo) : kfree(lo); | 205 | return ld->alloc_layout_hdr ? ld->free_layout_hdr(lo) : kfree(lo); |
193 | } | 206 | } |
194 | 207 | ||
@@ -223,6 +236,7 @@ static void | |||
223 | init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg) | 236 | init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg) |
224 | { | 237 | { |
225 | INIT_LIST_HEAD(&lseg->pls_list); | 238 | INIT_LIST_HEAD(&lseg->pls_list); |
239 | INIT_LIST_HEAD(&lseg->pls_lc_list); | ||
226 | atomic_set(&lseg->pls_refcount, 1); | 240 | atomic_set(&lseg->pls_refcount, 1); |
227 | smp_mb(); | 241 | smp_mb(); |
228 | set_bit(NFS_LSEG_VALID, &lseg->pls_flags); | 242 | set_bit(NFS_LSEG_VALID, &lseg->pls_flags); |
@@ -448,11 +462,20 @@ pnfs_destroy_layout(struct nfs_inode *nfsi) | |||
448 | void | 462 | void |
449 | pnfs_destroy_all_layouts(struct nfs_client *clp) | 463 | pnfs_destroy_all_layouts(struct nfs_client *clp) |
450 | { | 464 | { |
465 | struct nfs_server *server; | ||
451 | struct pnfs_layout_hdr *lo; | 466 | struct pnfs_layout_hdr *lo; |
452 | LIST_HEAD(tmp_list); | 467 | LIST_HEAD(tmp_list); |
453 | 468 | ||
469 | nfs4_deviceid_mark_client_invalid(clp); | ||
470 | nfs4_deviceid_purge_client(clp); | ||
471 | |||
454 | spin_lock(&clp->cl_lock); | 472 | spin_lock(&clp->cl_lock); |
455 | list_splice_init(&clp->cl_layouts, &tmp_list); | 473 | rcu_read_lock(); |
474 | list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { | ||
475 | if (!list_empty(&server->layouts)) | ||
476 | list_splice_init(&server->layouts, &tmp_list); | ||
477 | } | ||
478 | rcu_read_unlock(); | ||
456 | spin_unlock(&clp->cl_lock); | 479 | spin_unlock(&clp->cl_lock); |
457 | 480 | ||
458 | while (!list_empty(&tmp_list)) { | 481 | while (!list_empty(&tmp_list)) { |
@@ -661,6 +684,7 @@ _pnfs_return_layout(struct inode *ino) | |||
661 | lrp->args.stateid = stateid; | 684 | lrp->args.stateid = stateid; |
662 | lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; | 685 | lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; |
663 | lrp->args.inode = ino; | 686 | lrp->args.inode = ino; |
687 | lrp->args.layout = lo; | ||
664 | lrp->clp = NFS_SERVER(ino)->nfs_client; | 688 | lrp->clp = NFS_SERVER(ino)->nfs_client; |
665 | 689 | ||
666 | status = nfs4_proc_layoutreturn(lrp); | 690 | status = nfs4_proc_layoutreturn(lrp); |
@@ -805,7 +829,9 @@ out: | |||
805 | } | 829 | } |
806 | 830 | ||
807 | static struct pnfs_layout_hdr * | 831 | static struct pnfs_layout_hdr * |
808 | alloc_init_layout_hdr(struct inode *ino, gfp_t gfp_flags) | 832 | alloc_init_layout_hdr(struct inode *ino, |
833 | struct nfs_open_context *ctx, | ||
834 | gfp_t gfp_flags) | ||
809 | { | 835 | { |
810 | struct pnfs_layout_hdr *lo; | 836 | struct pnfs_layout_hdr *lo; |
811 | 837 | ||
@@ -817,11 +843,14 @@ alloc_init_layout_hdr(struct inode *ino, gfp_t gfp_flags) | |||
817 | INIT_LIST_HEAD(&lo->plh_segs); | 843 | INIT_LIST_HEAD(&lo->plh_segs); |
818 | INIT_LIST_HEAD(&lo->plh_bulk_recall); | 844 | INIT_LIST_HEAD(&lo->plh_bulk_recall); |
819 | lo->plh_inode = ino; | 845 | lo->plh_inode = ino; |
846 | lo->plh_lc_cred = get_rpccred(ctx->state->owner->so_cred); | ||
820 | return lo; | 847 | return lo; |
821 | } | 848 | } |
822 | 849 | ||
823 | static struct pnfs_layout_hdr * | 850 | static struct pnfs_layout_hdr * |
824 | pnfs_find_alloc_layout(struct inode *ino, gfp_t gfp_flags) | 851 | pnfs_find_alloc_layout(struct inode *ino, |
852 | struct nfs_open_context *ctx, | ||
853 | gfp_t gfp_flags) | ||
825 | { | 854 | { |
826 | struct nfs_inode *nfsi = NFS_I(ino); | 855 | struct nfs_inode *nfsi = NFS_I(ino); |
827 | struct pnfs_layout_hdr *new = NULL; | 856 | struct pnfs_layout_hdr *new = NULL; |
@@ -836,7 +865,7 @@ pnfs_find_alloc_layout(struct inode *ino, gfp_t gfp_flags) | |||
836 | return nfsi->layout; | 865 | return nfsi->layout; |
837 | } | 866 | } |
838 | spin_unlock(&ino->i_lock); | 867 | spin_unlock(&ino->i_lock); |
839 | new = alloc_init_layout_hdr(ino, gfp_flags); | 868 | new = alloc_init_layout_hdr(ino, ctx, gfp_flags); |
840 | spin_lock(&ino->i_lock); | 869 | spin_lock(&ino->i_lock); |
841 | 870 | ||
842 | if (likely(nfsi->layout == NULL)) /* Won the race? */ | 871 | if (likely(nfsi->layout == NULL)) /* Won the race? */ |
@@ -920,7 +949,8 @@ pnfs_update_layout(struct inode *ino, | |||
920 | }; | 949 | }; |
921 | unsigned pg_offset; | 950 | unsigned pg_offset; |
922 | struct nfs_inode *nfsi = NFS_I(ino); | 951 | struct nfs_inode *nfsi = NFS_I(ino); |
923 | struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; | 952 | struct nfs_server *server = NFS_SERVER(ino); |
953 | struct nfs_client *clp = server->nfs_client; | ||
924 | struct pnfs_layout_hdr *lo; | 954 | struct pnfs_layout_hdr *lo; |
925 | struct pnfs_layout_segment *lseg = NULL; | 955 | struct pnfs_layout_segment *lseg = NULL; |
926 | bool first = false; | 956 | bool first = false; |
@@ -928,7 +958,7 @@ pnfs_update_layout(struct inode *ino, | |||
928 | if (!pnfs_enabled_sb(NFS_SERVER(ino))) | 958 | if (!pnfs_enabled_sb(NFS_SERVER(ino))) |
929 | return NULL; | 959 | return NULL; |
930 | spin_lock(&ino->i_lock); | 960 | spin_lock(&ino->i_lock); |
931 | lo = pnfs_find_alloc_layout(ino, gfp_flags); | 961 | lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); |
932 | if (lo == NULL) { | 962 | if (lo == NULL) { |
933 | dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__); | 963 | dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__); |
934 | goto out_unlock; | 964 | goto out_unlock; |
@@ -964,7 +994,7 @@ pnfs_update_layout(struct inode *ino, | |||
964 | */ | 994 | */ |
965 | spin_lock(&clp->cl_lock); | 995 | spin_lock(&clp->cl_lock); |
966 | BUG_ON(!list_empty(&lo->plh_layouts)); | 996 | BUG_ON(!list_empty(&lo->plh_layouts)); |
967 | list_add_tail(&lo->plh_layouts, &clp->cl_layouts); | 997 | list_add_tail(&lo->plh_layouts, &server->layouts); |
968 | spin_unlock(&clp->cl_lock); | 998 | spin_unlock(&clp->cl_lock); |
969 | } | 999 | } |
970 | 1000 | ||
@@ -973,7 +1003,8 @@ pnfs_update_layout(struct inode *ino, | |||
973 | arg.offset -= pg_offset; | 1003 | arg.offset -= pg_offset; |
974 | arg.length += pg_offset; | 1004 | arg.length += pg_offset; |
975 | } | 1005 | } |
976 | arg.length = PAGE_CACHE_ALIGN(arg.length); | 1006 | if (arg.length != NFS4_MAX_UINT64) |
1007 | arg.length = PAGE_CACHE_ALIGN(arg.length); | ||
977 | 1008 | ||
978 | lseg = send_layoutget(lo, ctx, &arg, gfp_flags); | 1009 | lseg = send_layoutget(lo, ctx, &arg, gfp_flags); |
979 | if (!lseg && first) { | 1010 | if (!lseg && first) { |
@@ -991,6 +1022,7 @@ out_unlock: | |||
991 | spin_unlock(&ino->i_lock); | 1022 | spin_unlock(&ino->i_lock); |
992 | goto out; | 1023 | goto out; |
993 | } | 1024 | } |
1025 | EXPORT_SYMBOL_GPL(pnfs_update_layout); | ||
994 | 1026 | ||
995 | int | 1027 | int |
996 | pnfs_layout_process(struct nfs4_layoutget *lgp) | 1028 | pnfs_layout_process(struct nfs4_layoutget *lgp) |
@@ -1048,35 +1080,71 @@ out_forget_reply: | |||
1048 | goto out; | 1080 | goto out; |
1049 | } | 1081 | } |
1050 | 1082 | ||
1083 | void | ||
1084 | pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) | ||
1085 | { | ||
1086 | BUG_ON(pgio->pg_lseg != NULL); | ||
1087 | |||
1088 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | ||
1089 | req->wb_context, | ||
1090 | req_offset(req), | ||
1091 | req->wb_bytes, | ||
1092 | IOMODE_READ, | ||
1093 | GFP_KERNEL); | ||
1094 | /* If no lseg, fall back to read through mds */ | ||
1095 | if (pgio->pg_lseg == NULL) | ||
1096 | nfs_pageio_reset_read_mds(pgio); | ||
1097 | |||
1098 | } | ||
1099 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read); | ||
1100 | |||
1101 | void | ||
1102 | pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) | ||
1103 | { | ||
1104 | BUG_ON(pgio->pg_lseg != NULL); | ||
1105 | |||
1106 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | ||
1107 | req->wb_context, | ||
1108 | req_offset(req), | ||
1109 | req->wb_bytes, | ||
1110 | IOMODE_RW, | ||
1111 | GFP_NOFS); | ||
1112 | /* If no lseg, fall back to write through mds */ | ||
1113 | if (pgio->pg_lseg == NULL) | ||
1114 | nfs_pageio_reset_write_mds(pgio); | ||
1115 | } | ||
1116 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); | ||
1117 | |||
1051 | bool | 1118 | bool |
1052 | pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, | 1119 | pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) |
1053 | struct nfs_page *req) | ||
1054 | { | 1120 | { |
1055 | enum pnfs_iomode access_type; | 1121 | struct nfs_server *server = NFS_SERVER(inode); |
1056 | gfp_t gfp_flags; | 1122 | struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; |
1057 | 1123 | ||
1058 | /* We assume that pg_ioflags == 0 iff we're reading a page */ | 1124 | if (ld == NULL) |
1059 | if (pgio->pg_ioflags == 0) { | 1125 | return false; |
1060 | access_type = IOMODE_READ; | 1126 | nfs_pageio_init(pgio, inode, ld->pg_read_ops, server->rsize, 0); |
1061 | gfp_flags = GFP_KERNEL; | 1127 | return true; |
1062 | } else { | 1128 | } |
1063 | access_type = IOMODE_RW; | ||
1064 | gfp_flags = GFP_NOFS; | ||
1065 | } | ||
1066 | 1129 | ||
1067 | if (pgio->pg_lseg == NULL) { | 1130 | bool |
1068 | if (pgio->pg_count != prev->wb_bytes) | 1131 | pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) |
1069 | return true; | 1132 | { |
1070 | /* This is first coelesce call for a series of nfs_pages */ | 1133 | struct nfs_server *server = NFS_SERVER(inode); |
1071 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | 1134 | struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; |
1072 | prev->wb_context, | 1135 | |
1073 | req_offset(prev), | 1136 | if (ld == NULL) |
1074 | pgio->pg_count, | 1137 | return false; |
1075 | access_type, | 1138 | nfs_pageio_init(pgio, inode, ld->pg_write_ops, server->wsize, ioflags); |
1076 | gfp_flags); | 1139 | return true; |
1077 | if (pgio->pg_lseg == NULL) | 1140 | } |
1078 | return true; | 1141 | |
1079 | } | 1142 | bool |
1143 | pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, | ||
1144 | struct nfs_page *req) | ||
1145 | { | ||
1146 | if (pgio->pg_lseg == NULL) | ||
1147 | return nfs_generic_pg_test(pgio, prev, req); | ||
1080 | 1148 | ||
1081 | /* | 1149 | /* |
1082 | * Test if a nfs_page is fully contained in the pnfs_layout_range. | 1150 | * Test if a nfs_page is fully contained in the pnfs_layout_range. |
@@ -1100,35 +1168,44 @@ EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); | |||
1100 | /* | 1168 | /* |
1101 | * Called by non rpc-based layout drivers | 1169 | * Called by non rpc-based layout drivers |
1102 | */ | 1170 | */ |
1103 | int | 1171 | void pnfs_ld_write_done(struct nfs_write_data *data) |
1104 | pnfs_ld_write_done(struct nfs_write_data *data) | ||
1105 | { | 1172 | { |
1106 | int status; | 1173 | if (likely(!data->pnfs_error)) { |
1107 | |||
1108 | if (!data->pnfs_error) { | ||
1109 | pnfs_set_layoutcommit(data); | 1174 | pnfs_set_layoutcommit(data); |
1110 | data->mds_ops->rpc_call_done(&data->task, data); | 1175 | data->mds_ops->rpc_call_done(&data->task, data); |
1111 | data->mds_ops->rpc_release(data); | 1176 | } else { |
1112 | return 0; | 1177 | put_lseg(data->lseg); |
1178 | data->lseg = NULL; | ||
1179 | dprintk("pnfs write error = %d\n", data->pnfs_error); | ||
1113 | } | 1180 | } |
1114 | 1181 | data->mds_ops->rpc_release(data); | |
1115 | dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__, | ||
1116 | data->pnfs_error); | ||
1117 | status = nfs_initiate_write(data, NFS_CLIENT(data->inode), | ||
1118 | data->mds_ops, NFS_FILE_SYNC); | ||
1119 | return status ? : -EAGAIN; | ||
1120 | } | 1182 | } |
1121 | EXPORT_SYMBOL_GPL(pnfs_ld_write_done); | 1183 | EXPORT_SYMBOL_GPL(pnfs_ld_write_done); |
1122 | 1184 | ||
1123 | enum pnfs_try_status | 1185 | static void |
1186 | pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, | ||
1187 | struct nfs_write_data *data) | ||
1188 | { | ||
1189 | list_splice_tail_init(&data->pages, &desc->pg_list); | ||
1190 | if (data->req && list_empty(&data->req->wb_list)) | ||
1191 | nfs_list_add_request(data->req, &desc->pg_list); | ||
1192 | nfs_pageio_reset_write_mds(desc); | ||
1193 | desc->pg_recoalesce = 1; | ||
1194 | nfs_writedata_release(data); | ||
1195 | } | ||
1196 | |||
1197 | static enum pnfs_try_status | ||
1124 | pnfs_try_to_write_data(struct nfs_write_data *wdata, | 1198 | pnfs_try_to_write_data(struct nfs_write_data *wdata, |
1125 | const struct rpc_call_ops *call_ops, int how) | 1199 | const struct rpc_call_ops *call_ops, |
1200 | struct pnfs_layout_segment *lseg, | ||
1201 | int how) | ||
1126 | { | 1202 | { |
1127 | struct inode *inode = wdata->inode; | 1203 | struct inode *inode = wdata->inode; |
1128 | enum pnfs_try_status trypnfs; | 1204 | enum pnfs_try_status trypnfs; |
1129 | struct nfs_server *nfss = NFS_SERVER(inode); | 1205 | struct nfs_server *nfss = NFS_SERVER(inode); |
1130 | 1206 | ||
1131 | wdata->mds_ops = call_ops; | 1207 | wdata->mds_ops = call_ops; |
1208 | wdata->lseg = get_lseg(lseg); | ||
1132 | 1209 | ||
1133 | dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__, | 1210 | dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__, |
1134 | inode->i_ino, wdata->args.count, wdata->args.offset, how); | 1211 | inode->i_ino, wdata->args.count, wdata->args.offset, how); |
@@ -1144,41 +1221,87 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata, | |||
1144 | return trypnfs; | 1221 | return trypnfs; |
1145 | } | 1222 | } |
1146 | 1223 | ||
1224 | static void | ||
1225 | pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *head, int how) | ||
1226 | { | ||
1227 | struct nfs_write_data *data; | ||
1228 | const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; | ||
1229 | struct pnfs_layout_segment *lseg = desc->pg_lseg; | ||
1230 | |||
1231 | desc->pg_lseg = NULL; | ||
1232 | while (!list_empty(head)) { | ||
1233 | enum pnfs_try_status trypnfs; | ||
1234 | |||
1235 | data = list_entry(head->next, struct nfs_write_data, list); | ||
1236 | list_del_init(&data->list); | ||
1237 | |||
1238 | trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how); | ||
1239 | if (trypnfs == PNFS_NOT_ATTEMPTED) | ||
1240 | pnfs_write_through_mds(desc, data); | ||
1241 | } | ||
1242 | put_lseg(lseg); | ||
1243 | } | ||
1244 | |||
1245 | int | ||
1246 | pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) | ||
1247 | { | ||
1248 | LIST_HEAD(head); | ||
1249 | int ret; | ||
1250 | |||
1251 | ret = nfs_generic_flush(desc, &head); | ||
1252 | if (ret != 0) { | ||
1253 | put_lseg(desc->pg_lseg); | ||
1254 | desc->pg_lseg = NULL; | ||
1255 | return ret; | ||
1256 | } | ||
1257 | pnfs_do_multiple_writes(desc, &head, desc->pg_ioflags); | ||
1258 | return 0; | ||
1259 | } | ||
1260 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); | ||
1261 | |||
1147 | /* | 1262 | /* |
1148 | * Called by non rpc-based layout drivers | 1263 | * Called by non rpc-based layout drivers |
1149 | */ | 1264 | */ |
1150 | int | 1265 | void pnfs_ld_read_done(struct nfs_read_data *data) |
1151 | pnfs_ld_read_done(struct nfs_read_data *data) | ||
1152 | { | 1266 | { |
1153 | int status; | 1267 | if (likely(!data->pnfs_error)) { |
1154 | |||
1155 | if (!data->pnfs_error) { | ||
1156 | __nfs4_read_done_cb(data); | 1268 | __nfs4_read_done_cb(data); |
1157 | data->mds_ops->rpc_call_done(&data->task, data); | 1269 | data->mds_ops->rpc_call_done(&data->task, data); |
1158 | data->mds_ops->rpc_release(data); | 1270 | } else { |
1159 | return 0; | 1271 | put_lseg(data->lseg); |
1272 | data->lseg = NULL; | ||
1273 | dprintk("pnfs write error = %d\n", data->pnfs_error); | ||
1160 | } | 1274 | } |
1161 | 1275 | data->mds_ops->rpc_release(data); | |
1162 | dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__, | ||
1163 | data->pnfs_error); | ||
1164 | status = nfs_initiate_read(data, NFS_CLIENT(data->inode), | ||
1165 | data->mds_ops); | ||
1166 | return status ? : -EAGAIN; | ||
1167 | } | 1276 | } |
1168 | EXPORT_SYMBOL_GPL(pnfs_ld_read_done); | 1277 | EXPORT_SYMBOL_GPL(pnfs_ld_read_done); |
1169 | 1278 | ||
1279 | static void | ||
1280 | pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, | ||
1281 | struct nfs_read_data *data) | ||
1282 | { | ||
1283 | list_splice_tail_init(&data->pages, &desc->pg_list); | ||
1284 | if (data->req && list_empty(&data->req->wb_list)) | ||
1285 | nfs_list_add_request(data->req, &desc->pg_list); | ||
1286 | nfs_pageio_reset_read_mds(desc); | ||
1287 | desc->pg_recoalesce = 1; | ||
1288 | nfs_readdata_release(data); | ||
1289 | } | ||
1290 | |||
1170 | /* | 1291 | /* |
1171 | * Call the appropriate parallel I/O subsystem read function. | 1292 | * Call the appropriate parallel I/O subsystem read function. |
1172 | */ | 1293 | */ |
1173 | enum pnfs_try_status | 1294 | static enum pnfs_try_status |
1174 | pnfs_try_to_read_data(struct nfs_read_data *rdata, | 1295 | pnfs_try_to_read_data(struct nfs_read_data *rdata, |
1175 | const struct rpc_call_ops *call_ops) | 1296 | const struct rpc_call_ops *call_ops, |
1297 | struct pnfs_layout_segment *lseg) | ||
1176 | { | 1298 | { |
1177 | struct inode *inode = rdata->inode; | 1299 | struct inode *inode = rdata->inode; |
1178 | struct nfs_server *nfss = NFS_SERVER(inode); | 1300 | struct nfs_server *nfss = NFS_SERVER(inode); |
1179 | enum pnfs_try_status trypnfs; | 1301 | enum pnfs_try_status trypnfs; |
1180 | 1302 | ||
1181 | rdata->mds_ops = call_ops; | 1303 | rdata->mds_ops = call_ops; |
1304 | rdata->lseg = get_lseg(lseg); | ||
1182 | 1305 | ||
1183 | dprintk("%s: Reading ino:%lu %u@%llu\n", | 1306 | dprintk("%s: Reading ino:%lu %u@%llu\n", |
1184 | __func__, inode->i_ino, rdata->args.count, rdata->args.offset); | 1307 | __func__, inode->i_ino, rdata->args.count, rdata->args.offset); |
@@ -1194,19 +1317,70 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata, | |||
1194 | return trypnfs; | 1317 | return trypnfs; |
1195 | } | 1318 | } |
1196 | 1319 | ||
1320 | static void | ||
1321 | pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head) | ||
1322 | { | ||
1323 | struct nfs_read_data *data; | ||
1324 | const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; | ||
1325 | struct pnfs_layout_segment *lseg = desc->pg_lseg; | ||
1326 | |||
1327 | desc->pg_lseg = NULL; | ||
1328 | while (!list_empty(head)) { | ||
1329 | enum pnfs_try_status trypnfs; | ||
1330 | |||
1331 | data = list_entry(head->next, struct nfs_read_data, list); | ||
1332 | list_del_init(&data->list); | ||
1333 | |||
1334 | trypnfs = pnfs_try_to_read_data(data, call_ops, lseg); | ||
1335 | if (trypnfs == PNFS_NOT_ATTEMPTED) | ||
1336 | pnfs_read_through_mds(desc, data); | ||
1337 | } | ||
1338 | put_lseg(lseg); | ||
1339 | } | ||
1340 | |||
1341 | int | ||
1342 | pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) | ||
1343 | { | ||
1344 | LIST_HEAD(head); | ||
1345 | int ret; | ||
1346 | |||
1347 | ret = nfs_generic_pagein(desc, &head); | ||
1348 | if (ret != 0) { | ||
1349 | put_lseg(desc->pg_lseg); | ||
1350 | desc->pg_lseg = NULL; | ||
1351 | return ret; | ||
1352 | } | ||
1353 | pnfs_do_multiple_reads(desc, &head); | ||
1354 | return 0; | ||
1355 | } | ||
1356 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages); | ||
1357 | |||
1197 | /* | 1358 | /* |
1198 | * Currently there is only one (whole file) write lseg. | 1359 | * There can be multiple RW segments. |
1199 | */ | 1360 | */ |
1200 | static struct pnfs_layout_segment *pnfs_list_write_lseg(struct inode *inode) | 1361 | static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp) |
1201 | { | 1362 | { |
1202 | struct pnfs_layout_segment *lseg, *rv = NULL; | 1363 | struct pnfs_layout_segment *lseg; |
1203 | 1364 | ||
1204 | list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) | 1365 | list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) { |
1205 | if (lseg->pls_range.iomode == IOMODE_RW) | 1366 | if (lseg->pls_range.iomode == IOMODE_RW && |
1206 | rv = lseg; | 1367 | test_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) |
1207 | return rv; | 1368 | list_add(&lseg->pls_lc_list, listp); |
1369 | } | ||
1208 | } | 1370 | } |
1209 | 1371 | ||
1372 | void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg) | ||
1373 | { | ||
1374 | if (lseg->pls_range.iomode == IOMODE_RW) { | ||
1375 | dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__); | ||
1376 | set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); | ||
1377 | } else { | ||
1378 | dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__); | ||
1379 | set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); | ||
1380 | } | ||
1381 | } | ||
1382 | EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); | ||
1383 | |||
1210 | void | 1384 | void |
1211 | pnfs_set_layoutcommit(struct nfs_write_data *wdata) | 1385 | pnfs_set_layoutcommit(struct nfs_write_data *wdata) |
1212 | { | 1386 | { |
@@ -1216,17 +1390,19 @@ pnfs_set_layoutcommit(struct nfs_write_data *wdata) | |||
1216 | 1390 | ||
1217 | spin_lock(&nfsi->vfs_inode.i_lock); | 1391 | spin_lock(&nfsi->vfs_inode.i_lock); |
1218 | if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { | 1392 | if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { |
1219 | /* references matched in nfs4_layoutcommit_release */ | ||
1220 | get_lseg(wdata->lseg); | ||
1221 | wdata->lseg->pls_lc_cred = | ||
1222 | get_rpccred(wdata->args.context->state->owner->so_cred); | ||
1223 | mark_as_dirty = true; | 1393 | mark_as_dirty = true; |
1224 | dprintk("%s: Set layoutcommit for inode %lu ", | 1394 | dprintk("%s: Set layoutcommit for inode %lu ", |
1225 | __func__, wdata->inode->i_ino); | 1395 | __func__, wdata->inode->i_ino); |
1226 | } | 1396 | } |
1227 | if (end_pos > wdata->lseg->pls_end_pos) | 1397 | if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &wdata->lseg->pls_flags)) { |
1228 | wdata->lseg->pls_end_pos = end_pos; | 1398 | /* references matched in nfs4_layoutcommit_release */ |
1399 | get_lseg(wdata->lseg); | ||
1400 | } | ||
1401 | if (end_pos > nfsi->layout->plh_lwb) | ||
1402 | nfsi->layout->plh_lwb = end_pos; | ||
1229 | spin_unlock(&nfsi->vfs_inode.i_lock); | 1403 | spin_unlock(&nfsi->vfs_inode.i_lock); |
1404 | dprintk("%s: lseg %p end_pos %llu\n", | ||
1405 | __func__, wdata->lseg, nfsi->layout->plh_lwb); | ||
1230 | 1406 | ||
1231 | /* if pnfs_layoutcommit_inode() runs between inode locks, the next one | 1407 | /* if pnfs_layoutcommit_inode() runs between inode locks, the next one |
1232 | * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */ | 1408 | * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */ |
@@ -1235,6 +1411,14 @@ pnfs_set_layoutcommit(struct nfs_write_data *wdata) | |||
1235 | } | 1411 | } |
1236 | EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); | 1412 | EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); |
1237 | 1413 | ||
1414 | void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data) | ||
1415 | { | ||
1416 | struct nfs_server *nfss = NFS_SERVER(data->args.inode); | ||
1417 | |||
1418 | if (nfss->pnfs_curr_ld->cleanup_layoutcommit) | ||
1419 | nfss->pnfs_curr_ld->cleanup_layoutcommit(data); | ||
1420 | } | ||
1421 | |||
1238 | /* | 1422 | /* |
1239 | * For the LAYOUT4_NFSV4_1_FILES layout type, NFS_DATA_SYNC WRITEs and | 1423 | * For the LAYOUT4_NFSV4_1_FILES layout type, NFS_DATA_SYNC WRITEs and |
1240 | * NFS_UNSTABLE WRITEs with a COMMIT to data servers must store enough | 1424 | * NFS_UNSTABLE WRITEs with a COMMIT to data servers must store enough |
@@ -1248,8 +1432,6 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync) | |||
1248 | { | 1432 | { |
1249 | struct nfs4_layoutcommit_data *data; | 1433 | struct nfs4_layoutcommit_data *data; |
1250 | struct nfs_inode *nfsi = NFS_I(inode); | 1434 | struct nfs_inode *nfsi = NFS_I(inode); |
1251 | struct pnfs_layout_segment *lseg; | ||
1252 | struct rpc_cred *cred; | ||
1253 | loff_t end_pos; | 1435 | loff_t end_pos; |
1254 | int status = 0; | 1436 | int status = 0; |
1255 | 1437 | ||
@@ -1266,30 +1448,25 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync) | |||
1266 | goto out; | 1448 | goto out; |
1267 | } | 1449 | } |
1268 | 1450 | ||
1451 | INIT_LIST_HEAD(&data->lseg_list); | ||
1269 | spin_lock(&inode->i_lock); | 1452 | spin_lock(&inode->i_lock); |
1270 | if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { | 1453 | if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { |
1271 | spin_unlock(&inode->i_lock); | 1454 | spin_unlock(&inode->i_lock); |
1272 | kfree(data); | 1455 | kfree(data); |
1273 | goto out; | 1456 | goto out; |
1274 | } | 1457 | } |
1275 | /* | ||
1276 | * Currently only one (whole file) write lseg which is referenced | ||
1277 | * in pnfs_set_layoutcommit and will be found. | ||
1278 | */ | ||
1279 | lseg = pnfs_list_write_lseg(inode); | ||
1280 | 1458 | ||
1281 | end_pos = lseg->pls_end_pos; | 1459 | pnfs_list_write_lseg(inode, &data->lseg_list); |
1282 | cred = lseg->pls_lc_cred; | 1460 | |
1283 | lseg->pls_end_pos = 0; | 1461 | end_pos = nfsi->layout->plh_lwb; |
1284 | lseg->pls_lc_cred = NULL; | 1462 | nfsi->layout->plh_lwb = 0; |
1285 | 1463 | ||
1286 | memcpy(&data->args.stateid.data, nfsi->layout->plh_stateid.data, | 1464 | memcpy(&data->args.stateid.data, nfsi->layout->plh_stateid.data, |
1287 | sizeof(nfsi->layout->plh_stateid.data)); | 1465 | sizeof(nfsi->layout->plh_stateid.data)); |
1288 | spin_unlock(&inode->i_lock); | 1466 | spin_unlock(&inode->i_lock); |
1289 | 1467 | ||
1290 | data->args.inode = inode; | 1468 | data->args.inode = inode; |
1291 | data->lseg = lseg; | 1469 | data->cred = get_rpccred(nfsi->layout->plh_lc_cred); |
1292 | data->cred = cred; | ||
1293 | nfs_fattr_init(&data->fattr); | 1470 | nfs_fattr_init(&data->fattr); |
1294 | data->args.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask; | 1471 | data->args.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask; |
1295 | data->res.fattr = &data->fattr; | 1472 | data->res.fattr = &data->fattr; |
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 96bf4e6f45b..1509530cb11 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h | |||
@@ -36,16 +36,16 @@ | |||
36 | enum { | 36 | enum { |
37 | NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */ | 37 | NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */ |
38 | NFS_LSEG_ROC, /* roc bit received from server */ | 38 | NFS_LSEG_ROC, /* roc bit received from server */ |
39 | NFS_LSEG_LAYOUTCOMMIT, /* layoutcommit bit set for layoutcommit */ | ||
39 | }; | 40 | }; |
40 | 41 | ||
41 | struct pnfs_layout_segment { | 42 | struct pnfs_layout_segment { |
42 | struct list_head pls_list; | 43 | struct list_head pls_list; |
44 | struct list_head pls_lc_list; | ||
43 | struct pnfs_layout_range pls_range; | 45 | struct pnfs_layout_range pls_range; |
44 | atomic_t pls_refcount; | 46 | atomic_t pls_refcount; |
45 | unsigned long pls_flags; | 47 | unsigned long pls_flags; |
46 | struct pnfs_layout_hdr *pls_layout; | 48 | struct pnfs_layout_hdr *pls_layout; |
47 | struct rpc_cred *pls_lc_cred; /* LAYOUTCOMMIT credential */ | ||
48 | loff_t pls_end_pos; /* LAYOUTCOMMIT write end */ | ||
49 | }; | 49 | }; |
50 | 50 | ||
51 | enum pnfs_try_status { | 51 | enum pnfs_try_status { |
@@ -80,6 +80,9 @@ struct pnfs_layoutdriver_type { | |||
80 | struct module *owner; | 80 | struct module *owner; |
81 | unsigned flags; | 81 | unsigned flags; |
82 | 82 | ||
83 | int (*set_layoutdriver) (struct nfs_server *, const struct nfs_fh *); | ||
84 | int (*clear_layoutdriver) (struct nfs_server *); | ||
85 | |||
83 | struct pnfs_layout_hdr * (*alloc_layout_hdr) (struct inode *inode, gfp_t gfp_flags); | 86 | struct pnfs_layout_hdr * (*alloc_layout_hdr) (struct inode *inode, gfp_t gfp_flags); |
84 | void (*free_layout_hdr) (struct pnfs_layout_hdr *); | 87 | void (*free_layout_hdr) (struct pnfs_layout_hdr *); |
85 | 88 | ||
@@ -87,7 +90,8 @@ struct pnfs_layoutdriver_type { | |||
87 | void (*free_lseg) (struct pnfs_layout_segment *lseg); | 90 | void (*free_lseg) (struct pnfs_layout_segment *lseg); |
88 | 91 | ||
89 | /* test for nfs page cache coalescing */ | 92 | /* test for nfs page cache coalescing */ |
90 | bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); | 93 | const struct nfs_pageio_ops *pg_read_ops; |
94 | const struct nfs_pageio_ops *pg_write_ops; | ||
91 | 95 | ||
92 | /* Returns true if layoutdriver wants to divert this request to | 96 | /* Returns true if layoutdriver wants to divert this request to |
93 | * driver's commit routine. | 97 | * driver's commit routine. |
@@ -109,6 +113,8 @@ struct pnfs_layoutdriver_type { | |||
109 | struct xdr_stream *xdr, | 113 | struct xdr_stream *xdr, |
110 | const struct nfs4_layoutreturn_args *args); | 114 | const struct nfs4_layoutreturn_args *args); |
111 | 115 | ||
116 | void (*cleanup_layoutcommit) (struct nfs4_layoutcommit_data *data); | ||
117 | |||
112 | void (*encode_layoutcommit) (struct pnfs_layout_hdr *layoutid, | 118 | void (*encode_layoutcommit) (struct pnfs_layout_hdr *layoutid, |
113 | struct xdr_stream *xdr, | 119 | struct xdr_stream *xdr, |
114 | const struct nfs4_layoutcommit_args *args); | 120 | const struct nfs4_layoutcommit_args *args); |
@@ -124,6 +130,8 @@ struct pnfs_layout_hdr { | |||
124 | unsigned long plh_block_lgets; /* block LAYOUTGET if >0 */ | 130 | unsigned long plh_block_lgets; /* block LAYOUTGET if >0 */ |
125 | u32 plh_barrier; /* ignore lower seqids */ | 131 | u32 plh_barrier; /* ignore lower seqids */ |
126 | unsigned long plh_flags; | 132 | unsigned long plh_flags; |
133 | loff_t plh_lwb; /* last write byte for layoutcommit */ | ||
134 | struct rpc_cred *plh_lc_cred; /* layoutcommit cred */ | ||
127 | struct inode *plh_inode; | 135 | struct inode *plh_inode; |
128 | }; | 136 | }; |
129 | 137 | ||
@@ -136,10 +144,21 @@ struct pnfs_device { | |||
136 | unsigned int pglen; | 144 | unsigned int pglen; |
137 | }; | 145 | }; |
138 | 146 | ||
147 | #define NFS4_PNFS_GETDEVLIST_MAXNUM 16 | ||
148 | |||
149 | struct pnfs_devicelist { | ||
150 | unsigned int eof; | ||
151 | unsigned int num_devs; | ||
152 | struct nfs4_deviceid dev_id[NFS4_PNFS_GETDEVLIST_MAXNUM]; | ||
153 | }; | ||
154 | |||
139 | extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *); | 155 | extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *); |
140 | extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); | 156 | extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); |
141 | 157 | ||
142 | /* nfs4proc.c */ | 158 | /* nfs4proc.c */ |
159 | extern int nfs4_proc_getdevicelist(struct nfs_server *server, | ||
160 | const struct nfs_fh *fh, | ||
161 | struct pnfs_devicelist *devlist); | ||
143 | extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, | 162 | extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, |
144 | struct pnfs_device *dev); | 163 | struct pnfs_device *dev); |
145 | extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp); | 164 | extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp); |
@@ -148,17 +167,18 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); | |||
148 | /* pnfs.c */ | 167 | /* pnfs.c */ |
149 | void get_layout_hdr(struct pnfs_layout_hdr *lo); | 168 | void get_layout_hdr(struct pnfs_layout_hdr *lo); |
150 | void put_lseg(struct pnfs_layout_segment *lseg); | 169 | void put_lseg(struct pnfs_layout_segment *lseg); |
151 | struct pnfs_layout_segment * | 170 | |
152 | pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, | 171 | bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *); |
153 | loff_t pos, u64 count, enum pnfs_iomode access_type, | 172 | bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, int); |
154 | gfp_t gfp_flags); | 173 | |
155 | void set_pnfs_layoutdriver(struct nfs_server *, u32 id); | 174 | void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32); |
156 | void unset_pnfs_layoutdriver(struct nfs_server *); | 175 | void unset_pnfs_layoutdriver(struct nfs_server *); |
157 | enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *, | 176 | void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *); |
158 | const struct rpc_call_ops *, int); | 177 | int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc); |
159 | enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *, | 178 | void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *, struct nfs_page *); |
160 | const struct rpc_call_ops *); | 179 | int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc); |
161 | bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); | 180 | bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); |
181 | void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg); | ||
162 | int pnfs_layout_process(struct nfs4_layoutget *lgp); | 182 | int pnfs_layout_process(struct nfs4_layoutget *lgp); |
163 | void pnfs_free_lseg_list(struct list_head *tmp_list); | 183 | void pnfs_free_lseg_list(struct list_head *tmp_list); |
164 | void pnfs_destroy_layout(struct nfs_inode *); | 184 | void pnfs_destroy_layout(struct nfs_inode *); |
@@ -178,10 +198,24 @@ void pnfs_roc_release(struct inode *ino); | |||
178 | void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); | 198 | void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); |
179 | bool pnfs_roc_drain(struct inode *ino, u32 *barrier); | 199 | bool pnfs_roc_drain(struct inode *ino, u32 *barrier); |
180 | void pnfs_set_layoutcommit(struct nfs_write_data *wdata); | 200 | void pnfs_set_layoutcommit(struct nfs_write_data *wdata); |
201 | void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); | ||
181 | int pnfs_layoutcommit_inode(struct inode *inode, bool sync); | 202 | int pnfs_layoutcommit_inode(struct inode *inode, bool sync); |
182 | int _pnfs_return_layout(struct inode *); | 203 | int _pnfs_return_layout(struct inode *); |
183 | int pnfs_ld_write_done(struct nfs_write_data *); | 204 | void pnfs_ld_write_done(struct nfs_write_data *); |
184 | int pnfs_ld_read_done(struct nfs_read_data *); | 205 | void pnfs_ld_read_done(struct nfs_read_data *); |
206 | struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, | ||
207 | struct nfs_open_context *ctx, | ||
208 | loff_t pos, | ||
209 | u64 count, | ||
210 | enum pnfs_iomode iomode, | ||
211 | gfp_t gfp_flags); | ||
212 | |||
213 | void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp); | ||
214 | |||
215 | /* nfs4_deviceid_flags */ | ||
216 | enum { | ||
217 | NFS_DEVICEID_INVALID = 0, /* set when MDS clientid recalled */ | ||
218 | }; | ||
185 | 219 | ||
186 | /* pnfs_dev.c */ | 220 | /* pnfs_dev.c */ |
187 | struct nfs4_deviceid_node { | 221 | struct nfs4_deviceid_node { |
@@ -189,13 +223,13 @@ struct nfs4_deviceid_node { | |||
189 | struct hlist_node tmpnode; | 223 | struct hlist_node tmpnode; |
190 | const struct pnfs_layoutdriver_type *ld; | 224 | const struct pnfs_layoutdriver_type *ld; |
191 | const struct nfs_client *nfs_client; | 225 | const struct nfs_client *nfs_client; |
226 | unsigned long flags; | ||
192 | struct nfs4_deviceid deviceid; | 227 | struct nfs4_deviceid deviceid; |
193 | atomic_t ref; | 228 | atomic_t ref; |
194 | }; | 229 | }; |
195 | 230 | ||
196 | void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id); | 231 | void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id); |
197 | struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); | 232 | struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); |
198 | struct nfs4_deviceid_node *nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); | ||
199 | void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); | 233 | void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); |
200 | void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, | 234 | void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, |
201 | const struct pnfs_layoutdriver_type *, | 235 | const struct pnfs_layoutdriver_type *, |
@@ -293,15 +327,6 @@ static inline int pnfs_return_layout(struct inode *ino) | |||
293 | return 0; | 327 | return 0; |
294 | } | 328 | } |
295 | 329 | ||
296 | static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio, | ||
297 | struct inode *inode) | ||
298 | { | ||
299 | struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; | ||
300 | |||
301 | if (ld) | ||
302 | pgio->pg_test = ld->pg_test; | ||
303 | } | ||
304 | |||
305 | #else /* CONFIG_NFS_V4_1 */ | 330 | #else /* CONFIG_NFS_V4_1 */ |
306 | 331 | ||
307 | static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) | 332 | static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) |
@@ -322,28 +347,6 @@ static inline void put_lseg(struct pnfs_layout_segment *lseg) | |||
322 | { | 347 | { |
323 | } | 348 | } |
324 | 349 | ||
325 | static inline struct pnfs_layout_segment * | ||
326 | pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, | ||
327 | loff_t pos, u64 count, enum pnfs_iomode access_type, | ||
328 | gfp_t gfp_flags) | ||
329 | { | ||
330 | return NULL; | ||
331 | } | ||
332 | |||
333 | static inline enum pnfs_try_status | ||
334 | pnfs_try_to_read_data(struct nfs_read_data *data, | ||
335 | const struct rpc_call_ops *call_ops) | ||
336 | { | ||
337 | return PNFS_NOT_ATTEMPTED; | ||
338 | } | ||
339 | |||
340 | static inline enum pnfs_try_status | ||
341 | pnfs_try_to_write_data(struct nfs_write_data *data, | ||
342 | const struct rpc_call_ops *call_ops, int how) | ||
343 | { | ||
344 | return PNFS_NOT_ATTEMPTED; | ||
345 | } | ||
346 | |||
347 | static inline int pnfs_return_layout(struct inode *ino) | 350 | static inline int pnfs_return_layout(struct inode *ino) |
348 | { | 351 | { |
349 | return 0; | 352 | return 0; |
@@ -377,7 +380,8 @@ pnfs_roc_drain(struct inode *ino, u32 *barrier) | |||
377 | return false; | 380 | return false; |
378 | } | 381 | } |
379 | 382 | ||
380 | static inline void set_pnfs_layoutdriver(struct nfs_server *s, u32 id) | 383 | static inline void set_pnfs_layoutdriver(struct nfs_server *s, |
384 | const struct nfs_fh *mntfh, u32 id) | ||
381 | { | 385 | { |
382 | } | 386 | } |
383 | 387 | ||
@@ -385,9 +389,14 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s) | |||
385 | { | 389 | { |
386 | } | 390 | } |
387 | 391 | ||
388 | static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio, | 392 | static inline bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) |
389 | struct inode *inode) | ||
390 | { | 393 | { |
394 | return false; | ||
395 | } | ||
396 | |||
397 | static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) | ||
398 | { | ||
399 | return false; | ||
391 | } | 400 | } |
392 | 401 | ||
393 | static inline void | 402 | static inline void |
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c index f0f8e1e22f6..6fda5228ef5 100644 --- a/fs/nfs/pnfs_dev.c +++ b/fs/nfs/pnfs_dev.c | |||
@@ -100,8 +100,8 @@ _find_get_deviceid(const struct pnfs_layoutdriver_type *ld, | |||
100 | 100 | ||
101 | rcu_read_lock(); | 101 | rcu_read_lock(); |
102 | d = _lookup_deviceid(ld, clp, id, hash); | 102 | d = _lookup_deviceid(ld, clp, id, hash); |
103 | if (d && !atomic_inc_not_zero(&d->ref)) | 103 | if (d != NULL) |
104 | d = NULL; | 104 | atomic_inc(&d->ref); |
105 | rcu_read_unlock(); | 105 | rcu_read_unlock(); |
106 | return d; | 106 | return d; |
107 | } | 107 | } |
@@ -115,15 +115,15 @@ nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *ld, | |||
115 | EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid); | 115 | EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid); |
116 | 116 | ||
117 | /* | 117 | /* |
118 | * Unhash and put deviceid | 118 | * Remove a deviceid from cache |
119 | * | 119 | * |
120 | * @clp nfs_client associated with deviceid | 120 | * @clp nfs_client associated with deviceid |
121 | * @id the deviceid to unhash | 121 | * @id the deviceid to unhash |
122 | * | 122 | * |
123 | * @ret the unhashed node, if found and dereferenced to zero, NULL otherwise. | 123 | * @ret the unhashed node, if found and dereferenced to zero, NULL otherwise. |
124 | */ | 124 | */ |
125 | struct nfs4_deviceid_node * | 125 | void |
126 | nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *ld, | 126 | nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld, |
127 | const struct nfs_client *clp, const struct nfs4_deviceid *id) | 127 | const struct nfs_client *clp, const struct nfs4_deviceid *id) |
128 | { | 128 | { |
129 | struct nfs4_deviceid_node *d; | 129 | struct nfs4_deviceid_node *d; |
@@ -134,7 +134,7 @@ nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *ld, | |||
134 | rcu_read_unlock(); | 134 | rcu_read_unlock(); |
135 | if (!d) { | 135 | if (!d) { |
136 | spin_unlock(&nfs4_deviceid_lock); | 136 | spin_unlock(&nfs4_deviceid_lock); |
137 | return NULL; | 137 | return; |
138 | } | 138 | } |
139 | hlist_del_init_rcu(&d->node); | 139 | hlist_del_init_rcu(&d->node); |
140 | spin_unlock(&nfs4_deviceid_lock); | 140 | spin_unlock(&nfs4_deviceid_lock); |
@@ -142,28 +142,7 @@ nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *ld, | |||
142 | 142 | ||
143 | /* balance the initial ref set in pnfs_insert_deviceid */ | 143 | /* balance the initial ref set in pnfs_insert_deviceid */ |
144 | if (atomic_dec_and_test(&d->ref)) | 144 | if (atomic_dec_and_test(&d->ref)) |
145 | return d; | 145 | d->ld->free_deviceid_node(d); |
146 | |||
147 | return NULL; | ||
148 | } | ||
149 | EXPORT_SYMBOL_GPL(nfs4_unhash_put_deviceid); | ||
150 | |||
151 | /* | ||
152 | * Delete a deviceid from cache | ||
153 | * | ||
154 | * @clp struct nfs_client qualifying the deviceid | ||
155 | * @id deviceid to delete | ||
156 | */ | ||
157 | void | ||
158 | nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld, | ||
159 | const struct nfs_client *clp, const struct nfs4_deviceid *id) | ||
160 | { | ||
161 | struct nfs4_deviceid_node *d; | ||
162 | |||
163 | d = nfs4_unhash_put_deviceid(ld, clp, id); | ||
164 | if (!d) | ||
165 | return; | ||
166 | d->ld->free_deviceid_node(d); | ||
167 | } | 146 | } |
168 | EXPORT_SYMBOL_GPL(nfs4_delete_deviceid); | 147 | EXPORT_SYMBOL_GPL(nfs4_delete_deviceid); |
169 | 148 | ||
@@ -177,6 +156,7 @@ nfs4_init_deviceid_node(struct nfs4_deviceid_node *d, | |||
177 | INIT_HLIST_NODE(&d->tmpnode); | 156 | INIT_HLIST_NODE(&d->tmpnode); |
178 | d->ld = ld; | 157 | d->ld = ld; |
179 | d->nfs_client = nfs_client; | 158 | d->nfs_client = nfs_client; |
159 | d->flags = 0; | ||
180 | d->deviceid = *id; | 160 | d->deviceid = *id; |
181 | atomic_set(&d->ref, 1); | 161 | atomic_set(&d->ref, 1); |
182 | } | 162 | } |
@@ -221,16 +201,15 @@ EXPORT_SYMBOL_GPL(nfs4_insert_deviceid_node); | |||
221 | * | 201 | * |
222 | * @d deviceid node to put | 202 | * @d deviceid node to put |
223 | * | 203 | * |
224 | * @ret true iff the node was deleted | 204 | * return true iff the node was deleted |
205 | * Note that since the test for d->ref == 0 is sufficient to establish | ||
206 | * that the node is no longer hashed in the global device id cache. | ||
225 | */ | 207 | */ |
226 | bool | 208 | bool |
227 | nfs4_put_deviceid_node(struct nfs4_deviceid_node *d) | 209 | nfs4_put_deviceid_node(struct nfs4_deviceid_node *d) |
228 | { | 210 | { |
229 | if (!atomic_dec_and_lock(&d->ref, &nfs4_deviceid_lock)) | 211 | if (!atomic_dec_and_test(&d->ref)) |
230 | return false; | 212 | return false; |
231 | hlist_del_init_rcu(&d->node); | ||
232 | spin_unlock(&nfs4_deviceid_lock); | ||
233 | synchronize_rcu(); | ||
234 | d->ld->free_deviceid_node(d); | 213 | d->ld->free_deviceid_node(d); |
235 | return true; | 214 | return true; |
236 | } | 215 | } |
@@ -275,3 +254,22 @@ nfs4_deviceid_purge_client(const struct nfs_client *clp) | |||
275 | for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++) | 254 | for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++) |
276 | _deviceid_purge_client(clp, h); | 255 | _deviceid_purge_client(clp, h); |
277 | } | 256 | } |
257 | |||
258 | /* | ||
259 | * Stop use of all deviceids associated with an nfs_client | ||
260 | */ | ||
261 | void | ||
262 | nfs4_deviceid_mark_client_invalid(struct nfs_client *clp) | ||
263 | { | ||
264 | struct nfs4_deviceid_node *d; | ||
265 | struct hlist_node *n; | ||
266 | int i; | ||
267 | |||
268 | rcu_read_lock(); | ||
269 | for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i ++){ | ||
270 | hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[i], node) | ||
271 | if (d->nfs_client == clp) | ||
272 | set_bit(NFS_DEVICEID_INVALID, &d->flags); | ||
273 | } | ||
274 | rcu_read_unlock(); | ||
275 | } | ||
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index ac40b8535d7..f48125da198 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c | |||
@@ -710,6 +710,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = { | |||
710 | .dentry_ops = &nfs_dentry_operations, | 710 | .dentry_ops = &nfs_dentry_operations, |
711 | .dir_inode_ops = &nfs_dir_inode_operations, | 711 | .dir_inode_ops = &nfs_dir_inode_operations, |
712 | .file_inode_ops = &nfs_file_inode_operations, | 712 | .file_inode_ops = &nfs_file_inode_operations, |
713 | .file_ops = &nfs_file_operations, | ||
713 | .getroot = nfs_proc_get_root, | 714 | .getroot = nfs_proc_get_root, |
714 | .getattr = nfs_proc_getattr, | 715 | .getattr = nfs_proc_getattr, |
715 | .setattr = nfs_proc_setattr, | 716 | .setattr = nfs_proc_setattr, |
diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 20a7f952e24..bfc20b16024 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c | |||
@@ -30,8 +30,7 @@ | |||
30 | 30 | ||
31 | #define NFSDBG_FACILITY NFSDBG_PAGECACHE | 31 | #define NFSDBG_FACILITY NFSDBG_PAGECACHE |
32 | 32 | ||
33 | static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc); | 33 | static const struct nfs_pageio_ops nfs_pageio_read_ops; |
34 | static int nfs_pagein_one(struct nfs_pageio_descriptor *desc); | ||
35 | static const struct rpc_call_ops nfs_read_partial_ops; | 34 | static const struct rpc_call_ops nfs_read_partial_ops; |
36 | static const struct rpc_call_ops nfs_read_full_ops; | 35 | static const struct rpc_call_ops nfs_read_full_ops; |
37 | 36 | ||
@@ -68,7 +67,7 @@ void nfs_readdata_free(struct nfs_read_data *p) | |||
68 | mempool_free(p, nfs_rdata_mempool); | 67 | mempool_free(p, nfs_rdata_mempool); |
69 | } | 68 | } |
70 | 69 | ||
71 | static void nfs_readdata_release(struct nfs_read_data *rdata) | 70 | void nfs_readdata_release(struct nfs_read_data *rdata) |
72 | { | 71 | { |
73 | put_lseg(rdata->lseg); | 72 | put_lseg(rdata->lseg); |
74 | put_nfs_open_context(rdata->args.context); | 73 | put_nfs_open_context(rdata->args.context); |
@@ -113,6 +112,27 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data) | |||
113 | } | 112 | } |
114 | } | 113 | } |
115 | 114 | ||
115 | static void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, | ||
116 | struct inode *inode) | ||
117 | { | ||
118 | nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, | ||
119 | NFS_SERVER(inode)->rsize, 0); | ||
120 | } | ||
121 | |||
122 | void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) | ||
123 | { | ||
124 | pgio->pg_ops = &nfs_pageio_read_ops; | ||
125 | pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize; | ||
126 | } | ||
127 | EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); | ||
128 | |||
129 | static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, | ||
130 | struct inode *inode) | ||
131 | { | ||
132 | if (!pnfs_pageio_init_read(pgio, inode)) | ||
133 | nfs_pageio_init_read_mds(pgio, inode); | ||
134 | } | ||
135 | |||
116 | int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, | 136 | int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, |
117 | struct page *page) | 137 | struct page *page) |
118 | { | 138 | { |
@@ -131,20 +151,15 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, | |||
131 | if (len < PAGE_CACHE_SIZE) | 151 | if (len < PAGE_CACHE_SIZE) |
132 | zero_user_segment(page, len, PAGE_CACHE_SIZE); | 152 | zero_user_segment(page, len, PAGE_CACHE_SIZE); |
133 | 153 | ||
134 | nfs_pageio_init(&pgio, inode, NULL, 0, 0); | 154 | nfs_pageio_init_read(&pgio, inode); |
135 | nfs_list_add_request(new, &pgio.pg_list); | 155 | nfs_pageio_add_request(&pgio, new); |
136 | pgio.pg_count = len; | 156 | nfs_pageio_complete(&pgio); |
137 | |||
138 | if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) | ||
139 | nfs_pagein_multi(&pgio); | ||
140 | else | ||
141 | nfs_pagein_one(&pgio); | ||
142 | return 0; | 157 | return 0; |
143 | } | 158 | } |
144 | 159 | ||
145 | static void nfs_readpage_release(struct nfs_page *req) | 160 | static void nfs_readpage_release(struct nfs_page *req) |
146 | { | 161 | { |
147 | struct inode *d_inode = req->wb_context->path.dentry->d_inode; | 162 | struct inode *d_inode = req->wb_context->dentry->d_inode; |
148 | 163 | ||
149 | if (PageUptodate(req->wb_page)) | 164 | if (PageUptodate(req->wb_page)) |
150 | nfs_readpage_to_fscache(d_inode, req->wb_page, 0); | 165 | nfs_readpage_to_fscache(d_inode, req->wb_page, 0); |
@@ -152,8 +167,8 @@ static void nfs_readpage_release(struct nfs_page *req) | |||
152 | unlock_page(req->wb_page); | 167 | unlock_page(req->wb_page); |
153 | 168 | ||
154 | dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", | 169 | dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", |
155 | req->wb_context->path.dentry->d_inode->i_sb->s_id, | 170 | req->wb_context->dentry->d_inode->i_sb->s_id, |
156 | (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), | 171 | (long long)NFS_FILEID(req->wb_context->dentry->d_inode), |
157 | req->wb_bytes, | 172 | req->wb_bytes, |
158 | (long long)req_offset(req)); | 173 | (long long)req_offset(req)); |
159 | nfs_release_request(req); | 174 | nfs_release_request(req); |
@@ -202,17 +217,14 @@ EXPORT_SYMBOL_GPL(nfs_initiate_read); | |||
202 | /* | 217 | /* |
203 | * Set up the NFS read request struct | 218 | * Set up the NFS read request struct |
204 | */ | 219 | */ |
205 | static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, | 220 | static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, |
206 | const struct rpc_call_ops *call_ops, | 221 | unsigned int count, unsigned int offset) |
207 | unsigned int count, unsigned int offset, | ||
208 | struct pnfs_layout_segment *lseg) | ||
209 | { | 222 | { |
210 | struct inode *inode = req->wb_context->path.dentry->d_inode; | 223 | struct inode *inode = req->wb_context->dentry->d_inode; |
211 | 224 | ||
212 | data->req = req; | 225 | data->req = req; |
213 | data->inode = inode; | 226 | data->inode = inode; |
214 | data->cred = req->wb_context->cred; | 227 | data->cred = req->wb_context->cred; |
215 | data->lseg = get_lseg(lseg); | ||
216 | 228 | ||
217 | data->args.fh = NFS_FH(inode); | 229 | data->args.fh = NFS_FH(inode); |
218 | data->args.offset = req_offset(req) + offset; | 230 | data->args.offset = req_offset(req) + offset; |
@@ -226,14 +238,36 @@ static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, | |||
226 | data->res.count = count; | 238 | data->res.count = count; |
227 | data->res.eof = 0; | 239 | data->res.eof = 0; |
228 | nfs_fattr_init(&data->fattr); | 240 | nfs_fattr_init(&data->fattr); |
241 | } | ||
229 | 242 | ||
230 | if (data->lseg && | 243 | static int nfs_do_read(struct nfs_read_data *data, |
231 | (pnfs_try_to_read_data(data, call_ops) == PNFS_ATTEMPTED)) | 244 | const struct rpc_call_ops *call_ops) |
232 | return 0; | 245 | { |
246 | struct inode *inode = data->args.context->dentry->d_inode; | ||
233 | 247 | ||
234 | return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops); | 248 | return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops); |
235 | } | 249 | } |
236 | 250 | ||
251 | static int | ||
252 | nfs_do_multiple_reads(struct list_head *head, | ||
253 | const struct rpc_call_ops *call_ops) | ||
254 | { | ||
255 | struct nfs_read_data *data; | ||
256 | int ret = 0; | ||
257 | |||
258 | while (!list_empty(head)) { | ||
259 | int ret2; | ||
260 | |||
261 | data = list_entry(head->next, struct nfs_read_data, list); | ||
262 | list_del_init(&data->list); | ||
263 | |||
264 | ret2 = nfs_do_read(data, call_ops); | ||
265 | if (ret == 0) | ||
266 | ret = ret2; | ||
267 | } | ||
268 | return ret; | ||
269 | } | ||
270 | |||
237 | static void | 271 | static void |
238 | nfs_async_read_error(struct list_head *head) | 272 | nfs_async_read_error(struct list_head *head) |
239 | { | 273 | { |
@@ -260,20 +294,19 @@ nfs_async_read_error(struct list_head *head) | |||
260 | * won't see the new data until our attribute cache is updated. This is more | 294 | * won't see the new data until our attribute cache is updated. This is more |
261 | * or less conventional NFS client behavior. | 295 | * or less conventional NFS client behavior. |
262 | */ | 296 | */ |
263 | static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc) | 297 | static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head *res) |
264 | { | 298 | { |
265 | struct nfs_page *req = nfs_list_entry(desc->pg_list.next); | 299 | struct nfs_page *req = nfs_list_entry(desc->pg_list.next); |
266 | struct page *page = req->wb_page; | 300 | struct page *page = req->wb_page; |
267 | struct nfs_read_data *data; | 301 | struct nfs_read_data *data; |
268 | size_t rsize = NFS_SERVER(desc->pg_inode)->rsize, nbytes; | 302 | size_t rsize = desc->pg_bsize, nbytes; |
269 | unsigned int offset; | 303 | unsigned int offset; |
270 | int requests = 0; | 304 | int requests = 0; |
271 | int ret = 0; | 305 | int ret = 0; |
272 | struct pnfs_layout_segment *lseg; | ||
273 | LIST_HEAD(list); | ||
274 | 306 | ||
275 | nfs_list_remove_request(req); | 307 | nfs_list_remove_request(req); |
276 | 308 | ||
309 | offset = 0; | ||
277 | nbytes = desc->pg_count; | 310 | nbytes = desc->pg_count; |
278 | do { | 311 | do { |
279 | size_t len = min(nbytes,rsize); | 312 | size_t len = min(nbytes,rsize); |
@@ -281,45 +314,21 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc) | |||
281 | data = nfs_readdata_alloc(1); | 314 | data = nfs_readdata_alloc(1); |
282 | if (!data) | 315 | if (!data) |
283 | goto out_bad; | 316 | goto out_bad; |
284 | list_add(&data->pages, &list); | 317 | data->pagevec[0] = page; |
318 | nfs_read_rpcsetup(req, data, len, offset); | ||
319 | list_add(&data->list, res); | ||
285 | requests++; | 320 | requests++; |
286 | nbytes -= len; | 321 | nbytes -= len; |
322 | offset += len; | ||
287 | } while(nbytes != 0); | 323 | } while(nbytes != 0); |
288 | atomic_set(&req->wb_complete, requests); | 324 | atomic_set(&req->wb_complete, requests); |
289 | |||
290 | BUG_ON(desc->pg_lseg != NULL); | ||
291 | lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, | ||
292 | req_offset(req), desc->pg_count, | ||
293 | IOMODE_READ, GFP_KERNEL); | ||
294 | ClearPageError(page); | 325 | ClearPageError(page); |
295 | offset = 0; | 326 | desc->pg_rpc_callops = &nfs_read_partial_ops; |
296 | nbytes = desc->pg_count; | ||
297 | do { | ||
298 | int ret2; | ||
299 | |||
300 | data = list_entry(list.next, struct nfs_read_data, pages); | ||
301 | list_del_init(&data->pages); | ||
302 | |||
303 | data->pagevec[0] = page; | ||
304 | |||
305 | if (nbytes < rsize) | ||
306 | rsize = nbytes; | ||
307 | ret2 = nfs_read_rpcsetup(req, data, &nfs_read_partial_ops, | ||
308 | rsize, offset, lseg); | ||
309 | if (ret == 0) | ||
310 | ret = ret2; | ||
311 | offset += rsize; | ||
312 | nbytes -= rsize; | ||
313 | } while (nbytes != 0); | ||
314 | put_lseg(lseg); | ||
315 | desc->pg_lseg = NULL; | ||
316 | |||
317 | return ret; | 327 | return ret; |
318 | |||
319 | out_bad: | 328 | out_bad: |
320 | while (!list_empty(&list)) { | 329 | while (!list_empty(res)) { |
321 | data = list_entry(list.next, struct nfs_read_data, pages); | 330 | data = list_entry(res->next, struct nfs_read_data, list); |
322 | list_del(&data->pages); | 331 | list_del(&data->list); |
323 | nfs_readdata_free(data); | 332 | nfs_readdata_free(data); |
324 | } | 333 | } |
325 | SetPageError(page); | 334 | SetPageError(page); |
@@ -327,19 +336,19 @@ out_bad: | |||
327 | return -ENOMEM; | 336 | return -ENOMEM; |
328 | } | 337 | } |
329 | 338 | ||
330 | static int nfs_pagein_one(struct nfs_pageio_descriptor *desc) | 339 | static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *res) |
331 | { | 340 | { |
332 | struct nfs_page *req; | 341 | struct nfs_page *req; |
333 | struct page **pages; | 342 | struct page **pages; |
334 | struct nfs_read_data *data; | 343 | struct nfs_read_data *data; |
335 | struct list_head *head = &desc->pg_list; | 344 | struct list_head *head = &desc->pg_list; |
336 | struct pnfs_layout_segment *lseg = desc->pg_lseg; | 345 | int ret = 0; |
337 | int ret = -ENOMEM; | ||
338 | 346 | ||
339 | data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base, | 347 | data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base, |
340 | desc->pg_count)); | 348 | desc->pg_count)); |
341 | if (!data) { | 349 | if (!data) { |
342 | nfs_async_read_error(head); | 350 | nfs_async_read_error(head); |
351 | ret = -ENOMEM; | ||
343 | goto out; | 352 | goto out; |
344 | } | 353 | } |
345 | 354 | ||
@@ -352,19 +361,37 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc) | |||
352 | *pages++ = req->wb_page; | 361 | *pages++ = req->wb_page; |
353 | } | 362 | } |
354 | req = nfs_list_entry(data->pages.next); | 363 | req = nfs_list_entry(data->pages.next); |
355 | if ((!lseg) && list_is_singular(&data->pages)) | ||
356 | lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, | ||
357 | req_offset(req), desc->pg_count, | ||
358 | IOMODE_READ, GFP_KERNEL); | ||
359 | 364 | ||
360 | ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count, | 365 | nfs_read_rpcsetup(req, data, desc->pg_count, 0); |
361 | 0, lseg); | 366 | list_add(&data->list, res); |
367 | desc->pg_rpc_callops = &nfs_read_full_ops; | ||
362 | out: | 368 | out: |
363 | put_lseg(lseg); | ||
364 | desc->pg_lseg = NULL; | ||
365 | return ret; | 369 | return ret; |
366 | } | 370 | } |
367 | 371 | ||
372 | int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct list_head *head) | ||
373 | { | ||
374 | if (desc->pg_bsize < PAGE_CACHE_SIZE) | ||
375 | return nfs_pagein_multi(desc, head); | ||
376 | return nfs_pagein_one(desc, head); | ||
377 | } | ||
378 | |||
379 | static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) | ||
380 | { | ||
381 | LIST_HEAD(head); | ||
382 | int ret; | ||
383 | |||
384 | ret = nfs_generic_pagein(desc, &head); | ||
385 | if (ret == 0) | ||
386 | ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops); | ||
387 | return ret; | ||
388 | } | ||
389 | |||
390 | static const struct nfs_pageio_ops nfs_pageio_read_ops = { | ||
391 | .pg_test = nfs_generic_pg_test, | ||
392 | .pg_doio = nfs_generic_pg_readpages, | ||
393 | }; | ||
394 | |||
368 | /* | 395 | /* |
369 | * This is the callback from RPC telling us whether a reply was | 396 | * This is the callback from RPC telling us whether a reply was |
370 | * received or some error occurred (timeout or socket shutdown). | 397 | * received or some error occurred (timeout or socket shutdown). |
@@ -514,13 +541,23 @@ static void nfs_readpage_result_full(struct rpc_task *task, void *calldata) | |||
514 | static void nfs_readpage_release_full(void *calldata) | 541 | static void nfs_readpage_release_full(void *calldata) |
515 | { | 542 | { |
516 | struct nfs_read_data *data = calldata; | 543 | struct nfs_read_data *data = calldata; |
544 | struct nfs_pageio_descriptor pgio; | ||
517 | 545 | ||
546 | if (data->pnfs_error) { | ||
547 | nfs_pageio_init_read_mds(&pgio, data->inode); | ||
548 | pgio.pg_recoalesce = 1; | ||
549 | } | ||
518 | while (!list_empty(&data->pages)) { | 550 | while (!list_empty(&data->pages)) { |
519 | struct nfs_page *req = nfs_list_entry(data->pages.next); | 551 | struct nfs_page *req = nfs_list_entry(data->pages.next); |
520 | 552 | ||
521 | nfs_list_remove_request(req); | 553 | nfs_list_remove_request(req); |
522 | nfs_readpage_release(req); | 554 | if (!data->pnfs_error) |
555 | nfs_readpage_release(req); | ||
556 | else | ||
557 | nfs_pageio_add_request(&pgio, req); | ||
523 | } | 558 | } |
559 | if (data->pnfs_error) | ||
560 | nfs_pageio_complete(&pgio); | ||
524 | nfs_readdata_release(calldata); | 561 | nfs_readdata_release(calldata); |
525 | } | 562 | } |
526 | 563 | ||
@@ -635,8 +672,6 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, | |||
635 | .pgio = &pgio, | 672 | .pgio = &pgio, |
636 | }; | 673 | }; |
637 | struct inode *inode = mapping->host; | 674 | struct inode *inode = mapping->host; |
638 | struct nfs_server *server = NFS_SERVER(inode); | ||
639 | size_t rsize = server->rsize; | ||
640 | unsigned long npages; | 675 | unsigned long npages; |
641 | int ret = -ESTALE; | 676 | int ret = -ESTALE; |
642 | 677 | ||
@@ -664,10 +699,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, | |||
664 | if (ret == 0) | 699 | if (ret == 0) |
665 | goto read_complete; /* all pages were read */ | 700 | goto read_complete; /* all pages were read */ |
666 | 701 | ||
667 | if (rsize < PAGE_CACHE_SIZE) | 702 | nfs_pageio_init_read(&pgio, inode); |
668 | nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); | ||
669 | else | ||
670 | nfs_pageio_init(&pgio, inode, nfs_pagein_one, rsize, 0); | ||
671 | 703 | ||
672 | ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); | 704 | ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); |
673 | 705 | ||
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index ce40e5c568b..c4daf4eaad9 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -904,10 +904,24 @@ static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(unsigned int ve | |||
904 | data->auth_flavor_len = 1; | 904 | data->auth_flavor_len = 1; |
905 | data->version = version; | 905 | data->version = version; |
906 | data->minorversion = 0; | 906 | data->minorversion = 0; |
907 | security_init_mnt_opts(&data->lsm_opts); | ||
907 | } | 908 | } |
908 | return data; | 909 | return data; |
909 | } | 910 | } |
910 | 911 | ||
912 | static void nfs_free_parsed_mount_data(struct nfs_parsed_mount_data *data) | ||
913 | { | ||
914 | if (data) { | ||
915 | kfree(data->client_address); | ||
916 | kfree(data->mount_server.hostname); | ||
917 | kfree(data->nfs_server.export_path); | ||
918 | kfree(data->nfs_server.hostname); | ||
919 | kfree(data->fscache_uniq); | ||
920 | security_free_mnt_opts(&data->lsm_opts); | ||
921 | kfree(data); | ||
922 | } | ||
923 | } | ||
924 | |||
911 | /* | 925 | /* |
912 | * Sanity-check a server address provided by the mount command. | 926 | * Sanity-check a server address provided by the mount command. |
913 | * | 927 | * |
@@ -2035,9 +2049,6 @@ static inline void nfs_initialise_sb(struct super_block *sb) | |||
2035 | sb->s_blocksize = nfs_block_bits(server->wsize, | 2049 | sb->s_blocksize = nfs_block_bits(server->wsize, |
2036 | &sb->s_blocksize_bits); | 2050 | &sb->s_blocksize_bits); |
2037 | 2051 | ||
2038 | if (server->flags & NFS_MOUNT_NOAC) | ||
2039 | sb->s_flags |= MS_SYNCHRONOUS; | ||
2040 | |||
2041 | sb->s_bdi = &server->backing_dev_info; | 2052 | sb->s_bdi = &server->backing_dev_info; |
2042 | 2053 | ||
2043 | nfs_super_set_maxbytes(sb, server->maxfilesize); | 2054 | nfs_super_set_maxbytes(sb, server->maxfilesize); |
@@ -2218,9 +2229,7 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type, | |||
2218 | data = nfs_alloc_parsed_mount_data(NFS_DEFAULT_VERSION); | 2229 | data = nfs_alloc_parsed_mount_data(NFS_DEFAULT_VERSION); |
2219 | mntfh = nfs_alloc_fhandle(); | 2230 | mntfh = nfs_alloc_fhandle(); |
2220 | if (data == NULL || mntfh == NULL) | 2231 | if (data == NULL || mntfh == NULL) |
2221 | goto out_free_fh; | 2232 | goto out; |
2222 | |||
2223 | security_init_mnt_opts(&data->lsm_opts); | ||
2224 | 2233 | ||
2225 | /* Validate the mount data */ | 2234 | /* Validate the mount data */ |
2226 | error = nfs_validate_mount_data(raw_data, data, mntfh, dev_name); | 2235 | error = nfs_validate_mount_data(raw_data, data, mntfh, dev_name); |
@@ -2232,8 +2241,6 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type, | |||
2232 | #ifdef CONFIG_NFS_V4 | 2241 | #ifdef CONFIG_NFS_V4 |
2233 | if (data->version == 4) { | 2242 | if (data->version == 4) { |
2234 | mntroot = nfs4_try_mount(flags, dev_name, data); | 2243 | mntroot = nfs4_try_mount(flags, dev_name, data); |
2235 | kfree(data->client_address); | ||
2236 | kfree(data->nfs_server.export_path); | ||
2237 | goto out; | 2244 | goto out; |
2238 | } | 2245 | } |
2239 | #endif /* CONFIG_NFS_V4 */ | 2246 | #endif /* CONFIG_NFS_V4 */ |
@@ -2249,6 +2256,10 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type, | |||
2249 | if (server->flags & NFS_MOUNT_UNSHARED) | 2256 | if (server->flags & NFS_MOUNT_UNSHARED) |
2250 | compare_super = NULL; | 2257 | compare_super = NULL; |
2251 | 2258 | ||
2259 | /* -o noac implies -o sync */ | ||
2260 | if (server->flags & NFS_MOUNT_NOAC) | ||
2261 | sb_mntdata.mntflags |= MS_SYNCHRONOUS; | ||
2262 | |||
2252 | /* Get a superblock - note that we may end up sharing one that already exists */ | 2263 | /* Get a superblock - note that we may end up sharing one that already exists */ |
2253 | s = sget(fs_type, compare_super, nfs_set_super, &sb_mntdata); | 2264 | s = sget(fs_type, compare_super, nfs_set_super, &sb_mntdata); |
2254 | if (IS_ERR(s)) { | 2265 | if (IS_ERR(s)) { |
@@ -2284,13 +2295,8 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type, | |||
2284 | s->s_flags |= MS_ACTIVE; | 2295 | s->s_flags |= MS_ACTIVE; |
2285 | 2296 | ||
2286 | out: | 2297 | out: |
2287 | kfree(data->nfs_server.hostname); | 2298 | nfs_free_parsed_mount_data(data); |
2288 | kfree(data->mount_server.hostname); | ||
2289 | kfree(data->fscache_uniq); | ||
2290 | security_free_mnt_opts(&data->lsm_opts); | ||
2291 | out_free_fh: | ||
2292 | nfs_free_fhandle(mntfh); | 2299 | nfs_free_fhandle(mntfh); |
2293 | kfree(data); | ||
2294 | return mntroot; | 2300 | return mntroot; |
2295 | 2301 | ||
2296 | out_err_nosb: | 2302 | out_err_nosb: |
@@ -2361,6 +2367,10 @@ nfs_xdev_mount(struct file_system_type *fs_type, int flags, | |||
2361 | if (server->flags & NFS_MOUNT_UNSHARED) | 2367 | if (server->flags & NFS_MOUNT_UNSHARED) |
2362 | compare_super = NULL; | 2368 | compare_super = NULL; |
2363 | 2369 | ||
2370 | /* -o noac implies -o sync */ | ||
2371 | if (server->flags & NFS_MOUNT_NOAC) | ||
2372 | sb_mntdata.mntflags |= MS_SYNCHRONOUS; | ||
2373 | |||
2364 | /* Get a superblock - note that we may end up sharing one that already exists */ | 2374 | /* Get a superblock - note that we may end up sharing one that already exists */ |
2365 | s = sget(&nfs_fs_type, compare_super, nfs_set_super, &sb_mntdata); | 2375 | s = sget(&nfs_fs_type, compare_super, nfs_set_super, &sb_mntdata); |
2366 | if (IS_ERR(s)) { | 2376 | if (IS_ERR(s)) { |
@@ -2613,9 +2623,7 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags, | |||
2613 | 2623 | ||
2614 | mntfh = nfs_alloc_fhandle(); | 2624 | mntfh = nfs_alloc_fhandle(); |
2615 | if (data == NULL || mntfh == NULL) | 2625 | if (data == NULL || mntfh == NULL) |
2616 | goto out_free_fh; | 2626 | goto out; |
2617 | |||
2618 | security_init_mnt_opts(&data->lsm_opts); | ||
2619 | 2627 | ||
2620 | /* Get a volume representation */ | 2628 | /* Get a volume representation */ |
2621 | server = nfs4_create_server(data, mntfh); | 2629 | server = nfs4_create_server(data, mntfh); |
@@ -2628,6 +2636,10 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags, | |||
2628 | if (server->flags & NFS4_MOUNT_UNSHARED) | 2636 | if (server->flags & NFS4_MOUNT_UNSHARED) |
2629 | compare_super = NULL; | 2637 | compare_super = NULL; |
2630 | 2638 | ||
2639 | /* -o noac implies -o sync */ | ||
2640 | if (server->flags & NFS_MOUNT_NOAC) | ||
2641 | sb_mntdata.mntflags |= MS_SYNCHRONOUS; | ||
2642 | |||
2631 | /* Get a superblock - note that we may end up sharing one that already exists */ | 2643 | /* Get a superblock - note that we may end up sharing one that already exists */ |
2632 | s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata); | 2644 | s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata); |
2633 | if (IS_ERR(s)) { | 2645 | if (IS_ERR(s)) { |
@@ -2663,13 +2675,10 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags, | |||
2663 | 2675 | ||
2664 | s->s_flags |= MS_ACTIVE; | 2676 | s->s_flags |= MS_ACTIVE; |
2665 | 2677 | ||
2666 | security_free_mnt_opts(&data->lsm_opts); | ||
2667 | nfs_free_fhandle(mntfh); | 2678 | nfs_free_fhandle(mntfh); |
2668 | return mntroot; | 2679 | return mntroot; |
2669 | 2680 | ||
2670 | out: | 2681 | out: |
2671 | security_free_mnt_opts(&data->lsm_opts); | ||
2672 | out_free_fh: | ||
2673 | nfs_free_fhandle(mntfh); | 2682 | nfs_free_fhandle(mntfh); |
2674 | return ERR_PTR(error); | 2683 | return ERR_PTR(error); |
2675 | 2684 | ||
@@ -2773,16 +2782,12 @@ static void nfs_referral_loop_unprotect(void) | |||
2773 | static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt, | 2782 | static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt, |
2774 | const char *export_path) | 2783 | const char *export_path) |
2775 | { | 2784 | { |
2776 | struct nameidata *nd = NULL; | ||
2777 | struct mnt_namespace *ns_private; | 2785 | struct mnt_namespace *ns_private; |
2778 | struct super_block *s; | 2786 | struct super_block *s; |
2779 | struct dentry *dentry; | 2787 | struct dentry *dentry; |
2788 | struct path path; | ||
2780 | int ret; | 2789 | int ret; |
2781 | 2790 | ||
2782 | nd = kmalloc(sizeof(*nd), GFP_KERNEL); | ||
2783 | if (nd == NULL) | ||
2784 | return ERR_PTR(-ENOMEM); | ||
2785 | |||
2786 | ns_private = create_mnt_ns(root_mnt); | 2791 | ns_private = create_mnt_ns(root_mnt); |
2787 | ret = PTR_ERR(ns_private); | 2792 | ret = PTR_ERR(ns_private); |
2788 | if (IS_ERR(ns_private)) | 2793 | if (IS_ERR(ns_private)) |
@@ -2793,7 +2798,7 @@ static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt, | |||
2793 | goto out_put_mnt_ns; | 2798 | goto out_put_mnt_ns; |
2794 | 2799 | ||
2795 | ret = vfs_path_lookup(root_mnt->mnt_root, root_mnt, | 2800 | ret = vfs_path_lookup(root_mnt->mnt_root, root_mnt, |
2796 | export_path, LOOKUP_FOLLOW, nd); | 2801 | export_path, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path); |
2797 | 2802 | ||
2798 | nfs_referral_loop_unprotect(); | 2803 | nfs_referral_loop_unprotect(); |
2799 | put_mnt_ns(ns_private); | 2804 | put_mnt_ns(ns_private); |
@@ -2801,12 +2806,11 @@ static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt, | |||
2801 | if (ret != 0) | 2806 | if (ret != 0) |
2802 | goto out_err; | 2807 | goto out_err; |
2803 | 2808 | ||
2804 | s = nd->path.mnt->mnt_sb; | 2809 | s = path.mnt->mnt_sb; |
2805 | atomic_inc(&s->s_active); | 2810 | atomic_inc(&s->s_active); |
2806 | dentry = dget(nd->path.dentry); | 2811 | dentry = dget(path.dentry); |
2807 | 2812 | ||
2808 | path_put(&nd->path); | 2813 | path_put(&path); |
2809 | kfree(nd); | ||
2810 | down_write(&s->s_umount); | 2814 | down_write(&s->s_umount); |
2811 | return dentry; | 2815 | return dentry; |
2812 | out_put_mnt_ns: | 2816 | out_put_mnt_ns: |
@@ -2814,7 +2818,6 @@ out_put_mnt_ns: | |||
2814 | out_mntput: | 2818 | out_mntput: |
2815 | mntput(root_mnt); | 2819 | mntput(root_mnt); |
2816 | out_err: | 2820 | out_err: |
2817 | kfree(nd); | ||
2818 | return ERR_PTR(ret); | 2821 | return ERR_PTR(ret); |
2819 | } | 2822 | } |
2820 | 2823 | ||
@@ -2855,7 +2858,7 @@ static struct dentry *nfs4_mount(struct file_system_type *fs_type, | |||
2855 | 2858 | ||
2856 | data = nfs_alloc_parsed_mount_data(4); | 2859 | data = nfs_alloc_parsed_mount_data(4); |
2857 | if (data == NULL) | 2860 | if (data == NULL) |
2858 | goto out_free_data; | 2861 | goto out; |
2859 | 2862 | ||
2860 | /* Validate the mount data */ | 2863 | /* Validate the mount data */ |
2861 | error = nfs4_validate_mount_data(raw_data, data, dev_name); | 2864 | error = nfs4_validate_mount_data(raw_data, data, dev_name); |
@@ -2869,12 +2872,7 @@ static struct dentry *nfs4_mount(struct file_system_type *fs_type, | |||
2869 | error = PTR_ERR(res); | 2872 | error = PTR_ERR(res); |
2870 | 2873 | ||
2871 | out: | 2874 | out: |
2872 | kfree(data->client_address); | 2875 | nfs_free_parsed_mount_data(data); |
2873 | kfree(data->nfs_server.export_path); | ||
2874 | kfree(data->nfs_server.hostname); | ||
2875 | kfree(data->fscache_uniq); | ||
2876 | out_free_data: | ||
2877 | kfree(data); | ||
2878 | dprintk("<-- nfs4_mount() = %d%s\n", error, | 2876 | dprintk("<-- nfs4_mount() = %d%s\n", error, |
2879 | error != 0 ? " [error]" : ""); | 2877 | error != 0 ? " [error]" : ""); |
2880 | return res; | 2878 | return res; |
@@ -2922,6 +2920,10 @@ nfs4_xdev_mount(struct file_system_type *fs_type, int flags, | |||
2922 | if (server->flags & NFS4_MOUNT_UNSHARED) | 2920 | if (server->flags & NFS4_MOUNT_UNSHARED) |
2923 | compare_super = NULL; | 2921 | compare_super = NULL; |
2924 | 2922 | ||
2923 | /* -o noac implies -o sync */ | ||
2924 | if (server->flags & NFS_MOUNT_NOAC) | ||
2925 | sb_mntdata.mntflags |= MS_SYNCHRONOUS; | ||
2926 | |||
2925 | /* Get a superblock - note that we may end up sharing one that already exists */ | 2927 | /* Get a superblock - note that we may end up sharing one that already exists */ |
2926 | s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata); | 2928 | s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata); |
2927 | if (IS_ERR(s)) { | 2929 | if (IS_ERR(s)) { |
@@ -3009,6 +3011,10 @@ nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags, | |||
3009 | if (server->flags & NFS4_MOUNT_UNSHARED) | 3011 | if (server->flags & NFS4_MOUNT_UNSHARED) |
3010 | compare_super = NULL; | 3012 | compare_super = NULL; |
3011 | 3013 | ||
3014 | /* -o noac implies -o sync */ | ||
3015 | if (server->flags & NFS_MOUNT_NOAC) | ||
3016 | sb_mntdata.mntflags |= MS_SYNCHRONOUS; | ||
3017 | |||
3012 | /* Get a superblock - note that we may end up sharing one that already exists */ | 3018 | /* Get a superblock - note that we may end up sharing one that already exists */ |
3013 | s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata); | 3019 | s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata); |
3014 | if (IS_ERR(s)) { | 3020 | if (IS_ERR(s)) { |
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 8d6864c2a5f..b2fbbde58e4 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c | |||
@@ -147,7 +147,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n | |||
147 | 147 | ||
148 | alias = d_lookup(parent, &data->args.name); | 148 | alias = d_lookup(parent, &data->args.name); |
149 | if (alias != NULL) { | 149 | if (alias != NULL) { |
150 | int ret = 0; | 150 | int ret; |
151 | void *devname_garbage = NULL; | 151 | void *devname_garbage = NULL; |
152 | 152 | ||
153 | /* | 153 | /* |
@@ -155,14 +155,16 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n | |||
155 | * the sillyrename information to the aliased dentry. | 155 | * the sillyrename information to the aliased dentry. |
156 | */ | 156 | */ |
157 | nfs_free_dname(data); | 157 | nfs_free_dname(data); |
158 | ret = nfs_copy_dname(alias, data); | ||
158 | spin_lock(&alias->d_lock); | 159 | spin_lock(&alias->d_lock); |
159 | if (alias->d_inode != NULL && | 160 | if (ret == 0 && alias->d_inode != NULL && |
160 | !(alias->d_flags & DCACHE_NFSFS_RENAMED)) { | 161 | !(alias->d_flags & DCACHE_NFSFS_RENAMED)) { |
161 | devname_garbage = alias->d_fsdata; | 162 | devname_garbage = alias->d_fsdata; |
162 | alias->d_fsdata = data; | 163 | alias->d_fsdata = data; |
163 | alias->d_flags |= DCACHE_NFSFS_RENAMED; | 164 | alias->d_flags |= DCACHE_NFSFS_RENAMED; |
164 | ret = 1; | 165 | ret = 1; |
165 | } | 166 | } else |
167 | ret = 0; | ||
166 | spin_unlock(&alias->d_lock); | 168 | spin_unlock(&alias->d_lock); |
167 | nfs_dec_sillycount(dir); | 169 | nfs_dec_sillycount(dir); |
168 | dput(alias); | 170 | dput(alias); |
@@ -171,8 +173,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n | |||
171 | * point dentry is definitely not a root, so we won't need | 173 | * point dentry is definitely not a root, so we won't need |
172 | * that anymore. | 174 | * that anymore. |
173 | */ | 175 | */ |
174 | if (devname_garbage) | 176 | kfree(devname_garbage); |
175 | kfree(devname_garbage); | ||
176 | return ret; | 177 | return ret; |
177 | } | 178 | } |
178 | data->dir = igrab(dir); | 179 | data->dir = igrab(dir); |
@@ -204,8 +205,6 @@ static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data) | |||
204 | if (parent == NULL) | 205 | if (parent == NULL) |
205 | goto out_free; | 206 | goto out_free; |
206 | dir = parent->d_inode; | 207 | dir = parent->d_inode; |
207 | if (nfs_copy_dname(dentry, data) != 0) | ||
208 | goto out_dput; | ||
209 | /* Non-exclusive lock protects against concurrent lookup() calls */ | 208 | /* Non-exclusive lock protects against concurrent lookup() calls */ |
210 | spin_lock(&dir->i_lock); | 209 | spin_lock(&dir->i_lock); |
211 | if (atomic_inc_not_zero(&NFS_I(dir)->silly_count) == 0) { | 210 | if (atomic_inc_not_zero(&NFS_I(dir)->silly_count) == 0) { |
@@ -366,6 +365,8 @@ static void nfs_async_rename_done(struct rpc_task *task, void *calldata) | |||
366 | struct nfs_renamedata *data = calldata; | 365 | struct nfs_renamedata *data = calldata; |
367 | struct inode *old_dir = data->old_dir; | 366 | struct inode *old_dir = data->old_dir; |
368 | struct inode *new_dir = data->new_dir; | 367 | struct inode *new_dir = data->new_dir; |
368 | struct dentry *old_dentry = data->old_dentry; | ||
369 | struct dentry *new_dentry = data->new_dentry; | ||
369 | 370 | ||
370 | if (!NFS_PROTO(old_dir)->rename_done(task, old_dir, new_dir)) { | 371 | if (!NFS_PROTO(old_dir)->rename_done(task, old_dir, new_dir)) { |
371 | nfs_restart_rpc(task, NFS_SERVER(old_dir)->nfs_client); | 372 | nfs_restart_rpc(task, NFS_SERVER(old_dir)->nfs_client); |
@@ -373,12 +374,12 @@ static void nfs_async_rename_done(struct rpc_task *task, void *calldata) | |||
373 | } | 374 | } |
374 | 375 | ||
375 | if (task->tk_status != 0) { | 376 | if (task->tk_status != 0) { |
376 | nfs_cancel_async_unlink(data->old_dentry); | 377 | nfs_cancel_async_unlink(old_dentry); |
377 | return; | 378 | return; |
378 | } | 379 | } |
379 | 380 | ||
380 | nfs_set_verifier(data->old_dentry, nfs_save_change_attribute(old_dir)); | 381 | d_drop(old_dentry); |
381 | d_move(data->old_dentry, data->new_dentry); | 382 | d_drop(new_dentry); |
382 | } | 383 | } |
383 | 384 | ||
384 | /** | 385 | /** |
@@ -501,6 +502,14 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir, | |||
501 | * and only performs the unlink once the last reference to it is put. | 502 | * and only performs the unlink once the last reference to it is put. |
502 | * | 503 | * |
503 | * The final cleanup is done during dentry_iput. | 504 | * The final cleanup is done during dentry_iput. |
505 | * | ||
506 | * (Note: NFSv4 is stateful, and has opens, so in theory an NFSv4 server | ||
507 | * could take responsibility for keeping open files referenced. The server | ||
508 | * would also need to ensure that opened-but-deleted files were kept over | ||
509 | * reboots. However, we may not assume a server does so. (RFC 5661 | ||
510 | * does provide an OPEN4_RESULT_PRESERVE_UNLINKED flag that a server can | ||
511 | * use to advertise that it does this; some day we may take advantage of | ||
512 | * it.)) | ||
504 | */ | 513 | */ |
505 | int | 514 | int |
506 | nfs_sillyrename(struct inode *dir, struct dentry *dentry) | 515 | nfs_sillyrename(struct inode *dir, struct dentry *dentry) |
@@ -560,6 +569,14 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) | |||
560 | if (error) | 569 | if (error) |
561 | goto out_dput; | 570 | goto out_dput; |
562 | 571 | ||
572 | /* populate unlinkdata with the right dname */ | ||
573 | error = nfs_copy_dname(sdentry, | ||
574 | (struct nfs_unlinkdata *)dentry->d_fsdata); | ||
575 | if (error) { | ||
576 | nfs_cancel_async_unlink(dentry); | ||
577 | goto out_dput; | ||
578 | } | ||
579 | |||
563 | /* run the rename task, undo unlink if it fails */ | 580 | /* run the rename task, undo unlink if it fails */ |
564 | task = nfs_async_rename(dir, dir, dentry, sdentry); | 581 | task = nfs_async_rename(dir, dir, dentry, sdentry); |
565 | if (IS_ERR(task)) { | 582 | if (IS_ERR(task)) { |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 72716805968..106fd0634ab 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -97,7 +97,7 @@ void nfs_writedata_free(struct nfs_write_data *p) | |||
97 | mempool_free(p, nfs_wdata_mempool); | 97 | mempool_free(p, nfs_wdata_mempool); |
98 | } | 98 | } |
99 | 99 | ||
100 | static void nfs_writedata_release(struct nfs_write_data *wdata) | 100 | void nfs_writedata_release(struct nfs_write_data *wdata) |
101 | { | 101 | { |
102 | put_lseg(wdata->lseg); | 102 | put_lseg(wdata->lseg); |
103 | put_nfs_open_context(wdata->args.context); | 103 | put_nfs_open_context(wdata->args.context); |
@@ -409,7 +409,7 @@ out: | |||
409 | */ | 409 | */ |
410 | static void nfs_inode_remove_request(struct nfs_page *req) | 410 | static void nfs_inode_remove_request(struct nfs_page *req) |
411 | { | 411 | { |
412 | struct inode *inode = req->wb_context->path.dentry->d_inode; | 412 | struct inode *inode = req->wb_context->dentry->d_inode; |
413 | struct nfs_inode *nfsi = NFS_I(inode); | 413 | struct nfs_inode *nfsi = NFS_I(inode); |
414 | 414 | ||
415 | BUG_ON (!NFS_WBACK_BUSY(req)); | 415 | BUG_ON (!NFS_WBACK_BUSY(req)); |
@@ -428,7 +428,6 @@ static void | |||
428 | nfs_mark_request_dirty(struct nfs_page *req) | 428 | nfs_mark_request_dirty(struct nfs_page *req) |
429 | { | 429 | { |
430 | __set_page_dirty_nobuffers(req->wb_page); | 430 | __set_page_dirty_nobuffers(req->wb_page); |
431 | __mark_inode_dirty(req->wb_page->mapping->host, I_DIRTY_DATASYNC); | ||
432 | } | 431 | } |
433 | 432 | ||
434 | #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) | 433 | #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) |
@@ -438,7 +437,7 @@ nfs_mark_request_dirty(struct nfs_page *req) | |||
438 | static void | 437 | static void |
439 | nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) | 438 | nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) |
440 | { | 439 | { |
441 | struct inode *inode = req->wb_context->path.dentry->d_inode; | 440 | struct inode *inode = req->wb_context->dentry->d_inode; |
442 | struct nfs_inode *nfsi = NFS_I(inode); | 441 | struct nfs_inode *nfsi = NFS_I(inode); |
443 | 442 | ||
444 | spin_lock(&inode->i_lock); | 443 | spin_lock(&inode->i_lock); |
@@ -762,6 +761,8 @@ int nfs_updatepage(struct file *file, struct page *page, | |||
762 | status = nfs_writepage_setup(ctx, page, offset, count); | 761 | status = nfs_writepage_setup(ctx, page, offset, count); |
763 | if (status < 0) | 762 | if (status < 0) |
764 | nfs_set_pageerror(page); | 763 | nfs_set_pageerror(page); |
764 | else | ||
765 | __set_page_dirty_nobuffers(page); | ||
765 | 766 | ||
766 | dprintk("NFS: nfs_updatepage returns %d (isize %lld)\n", | 767 | dprintk("NFS: nfs_updatepage returns %d (isize %lld)\n", |
767 | status, (long long)i_size_read(inode)); | 768 | status, (long long)i_size_read(inode)); |
@@ -845,22 +846,19 @@ EXPORT_SYMBOL_GPL(nfs_initiate_write); | |||
845 | /* | 846 | /* |
846 | * Set up the argument/result storage required for the RPC call. | 847 | * Set up the argument/result storage required for the RPC call. |
847 | */ | 848 | */ |
848 | static int nfs_write_rpcsetup(struct nfs_page *req, | 849 | static void nfs_write_rpcsetup(struct nfs_page *req, |
849 | struct nfs_write_data *data, | 850 | struct nfs_write_data *data, |
850 | const struct rpc_call_ops *call_ops, | ||
851 | unsigned int count, unsigned int offset, | 851 | unsigned int count, unsigned int offset, |
852 | struct pnfs_layout_segment *lseg, | ||
853 | int how) | 852 | int how) |
854 | { | 853 | { |
855 | struct inode *inode = req->wb_context->path.dentry->d_inode; | 854 | struct inode *inode = req->wb_context->dentry->d_inode; |
856 | 855 | ||
857 | /* Set up the RPC argument and reply structs | 856 | /* Set up the RPC argument and reply structs |
858 | * NB: take care not to mess about with data->commit et al. */ | 857 | * NB: take care not to mess about with data->commit et al. */ |
859 | 858 | ||
860 | data->req = req; | 859 | data->req = req; |
861 | data->inode = inode = req->wb_context->path.dentry->d_inode; | 860 | data->inode = inode = req->wb_context->dentry->d_inode; |
862 | data->cred = req->wb_context->cred; | 861 | data->cred = req->wb_context->cred; |
863 | data->lseg = get_lseg(lseg); | ||
864 | 862 | ||
865 | data->args.fh = NFS_FH(inode); | 863 | data->args.fh = NFS_FH(inode); |
866 | data->args.offset = req_offset(req) + offset; | 864 | data->args.offset = req_offset(req) + offset; |
@@ -872,24 +870,51 @@ static int nfs_write_rpcsetup(struct nfs_page *req, | |||
872 | data->args.context = get_nfs_open_context(req->wb_context); | 870 | data->args.context = get_nfs_open_context(req->wb_context); |
873 | data->args.lock_context = req->wb_lock_context; | 871 | data->args.lock_context = req->wb_lock_context; |
874 | data->args.stable = NFS_UNSTABLE; | 872 | data->args.stable = NFS_UNSTABLE; |
875 | if (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) { | 873 | switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) { |
876 | data->args.stable = NFS_DATA_SYNC; | 874 | case 0: |
877 | if (!nfs_need_commit(NFS_I(inode))) | 875 | break; |
878 | data->args.stable = NFS_FILE_SYNC; | 876 | case FLUSH_COND_STABLE: |
877 | if (nfs_need_commit(NFS_I(inode))) | ||
878 | break; | ||
879 | default: | ||
880 | data->args.stable = NFS_FILE_SYNC; | ||
879 | } | 881 | } |
880 | 882 | ||
881 | data->res.fattr = &data->fattr; | 883 | data->res.fattr = &data->fattr; |
882 | data->res.count = count; | 884 | data->res.count = count; |
883 | data->res.verf = &data->verf; | 885 | data->res.verf = &data->verf; |
884 | nfs_fattr_init(&data->fattr); | 886 | nfs_fattr_init(&data->fattr); |
887 | } | ||
885 | 888 | ||
886 | if (data->lseg && | 889 | static int nfs_do_write(struct nfs_write_data *data, |
887 | (pnfs_try_to_write_data(data, call_ops, how) == PNFS_ATTEMPTED)) | 890 | const struct rpc_call_ops *call_ops, |
888 | return 0; | 891 | int how) |
892 | { | ||
893 | struct inode *inode = data->args.context->dentry->d_inode; | ||
889 | 894 | ||
890 | return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how); | 895 | return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how); |
891 | } | 896 | } |
892 | 897 | ||
898 | static int nfs_do_multiple_writes(struct list_head *head, | ||
899 | const struct rpc_call_ops *call_ops, | ||
900 | int how) | ||
901 | { | ||
902 | struct nfs_write_data *data; | ||
903 | int ret = 0; | ||
904 | |||
905 | while (!list_empty(head)) { | ||
906 | int ret2; | ||
907 | |||
908 | data = list_entry(head->next, struct nfs_write_data, list); | ||
909 | list_del_init(&data->list); | ||
910 | |||
911 | ret2 = nfs_do_write(data, call_ops, how); | ||
912 | if (ret == 0) | ||
913 | ret = ret2; | ||
914 | } | ||
915 | return ret; | ||
916 | } | ||
917 | |||
893 | /* If a nfs_flush_* function fails, it should remove reqs from @head and | 918 | /* If a nfs_flush_* function fails, it should remove reqs from @head and |
894 | * call this on each, which will prepare them to be retried on next | 919 | * call this on each, which will prepare them to be retried on next |
895 | * writeback using standard nfs. | 920 | * writeback using standard nfs. |
@@ -907,17 +932,15 @@ static void nfs_redirty_request(struct nfs_page *req) | |||
907 | * Generate multiple small requests to write out a single | 932 | * Generate multiple small requests to write out a single |
908 | * contiguous dirty area on one page. | 933 | * contiguous dirty area on one page. |
909 | */ | 934 | */ |
910 | static int nfs_flush_multi(struct nfs_pageio_descriptor *desc) | 935 | static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head *res) |
911 | { | 936 | { |
912 | struct nfs_page *req = nfs_list_entry(desc->pg_list.next); | 937 | struct nfs_page *req = nfs_list_entry(desc->pg_list.next); |
913 | struct page *page = req->wb_page; | 938 | struct page *page = req->wb_page; |
914 | struct nfs_write_data *data; | 939 | struct nfs_write_data *data; |
915 | size_t wsize = NFS_SERVER(desc->pg_inode)->wsize, nbytes; | 940 | size_t wsize = desc->pg_bsize, nbytes; |
916 | unsigned int offset; | 941 | unsigned int offset; |
917 | int requests = 0; | 942 | int requests = 0; |
918 | int ret = 0; | 943 | int ret = 0; |
919 | struct pnfs_layout_segment *lseg; | ||
920 | LIST_HEAD(list); | ||
921 | 944 | ||
922 | nfs_list_remove_request(req); | 945 | nfs_list_remove_request(req); |
923 | 946 | ||
@@ -927,6 +950,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc) | |||
927 | desc->pg_ioflags &= ~FLUSH_COND_STABLE; | 950 | desc->pg_ioflags &= ~FLUSH_COND_STABLE; |
928 | 951 | ||
929 | 952 | ||
953 | offset = 0; | ||
930 | nbytes = desc->pg_count; | 954 | nbytes = desc->pg_count; |
931 | do { | 955 | do { |
932 | size_t len = min(nbytes, wsize); | 956 | size_t len = min(nbytes, wsize); |
@@ -934,45 +958,21 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc) | |||
934 | data = nfs_writedata_alloc(1); | 958 | data = nfs_writedata_alloc(1); |
935 | if (!data) | 959 | if (!data) |
936 | goto out_bad; | 960 | goto out_bad; |
937 | list_add(&data->pages, &list); | 961 | data->pagevec[0] = page; |
962 | nfs_write_rpcsetup(req, data, len, offset, desc->pg_ioflags); | ||
963 | list_add(&data->list, res); | ||
938 | requests++; | 964 | requests++; |
939 | nbytes -= len; | 965 | nbytes -= len; |
966 | offset += len; | ||
940 | } while (nbytes != 0); | 967 | } while (nbytes != 0); |
941 | atomic_set(&req->wb_complete, requests); | 968 | atomic_set(&req->wb_complete, requests); |
942 | 969 | desc->pg_rpc_callops = &nfs_write_partial_ops; | |
943 | BUG_ON(desc->pg_lseg); | ||
944 | lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, | ||
945 | req_offset(req), desc->pg_count, | ||
946 | IOMODE_RW, GFP_NOFS); | ||
947 | ClearPageError(page); | ||
948 | offset = 0; | ||
949 | nbytes = desc->pg_count; | ||
950 | do { | ||
951 | int ret2; | ||
952 | |||
953 | data = list_entry(list.next, struct nfs_write_data, pages); | ||
954 | list_del_init(&data->pages); | ||
955 | |||
956 | data->pagevec[0] = page; | ||
957 | |||
958 | if (nbytes < wsize) | ||
959 | wsize = nbytes; | ||
960 | ret2 = nfs_write_rpcsetup(req, data, &nfs_write_partial_ops, | ||
961 | wsize, offset, lseg, desc->pg_ioflags); | ||
962 | if (ret == 0) | ||
963 | ret = ret2; | ||
964 | offset += wsize; | ||
965 | nbytes -= wsize; | ||
966 | } while (nbytes != 0); | ||
967 | |||
968 | put_lseg(lseg); | ||
969 | desc->pg_lseg = NULL; | ||
970 | return ret; | 970 | return ret; |
971 | 971 | ||
972 | out_bad: | 972 | out_bad: |
973 | while (!list_empty(&list)) { | 973 | while (!list_empty(res)) { |
974 | data = list_entry(list.next, struct nfs_write_data, pages); | 974 | data = list_entry(res->next, struct nfs_write_data, list); |
975 | list_del(&data->pages); | 975 | list_del(&data->list); |
976 | nfs_writedata_free(data); | 976 | nfs_writedata_free(data); |
977 | } | 977 | } |
978 | nfs_redirty_request(req); | 978 | nfs_redirty_request(req); |
@@ -987,14 +987,13 @@ out_bad: | |||
987 | * This is the case if nfs_updatepage detects a conflicting request | 987 | * This is the case if nfs_updatepage detects a conflicting request |
988 | * that has been written but not committed. | 988 | * that has been written but not committed. |
989 | */ | 989 | */ |
990 | static int nfs_flush_one(struct nfs_pageio_descriptor *desc) | 990 | static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *res) |
991 | { | 991 | { |
992 | struct nfs_page *req; | 992 | struct nfs_page *req; |
993 | struct page **pages; | 993 | struct page **pages; |
994 | struct nfs_write_data *data; | 994 | struct nfs_write_data *data; |
995 | struct list_head *head = &desc->pg_list; | 995 | struct list_head *head = &desc->pg_list; |
996 | struct pnfs_layout_segment *lseg = desc->pg_lseg; | 996 | int ret = 0; |
997 | int ret; | ||
998 | 997 | ||
999 | data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base, | 998 | data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base, |
1000 | desc->pg_count)); | 999 | desc->pg_count)); |
@@ -1016,32 +1015,62 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc) | |||
1016 | *pages++ = req->wb_page; | 1015 | *pages++ = req->wb_page; |
1017 | } | 1016 | } |
1018 | req = nfs_list_entry(data->pages.next); | 1017 | req = nfs_list_entry(data->pages.next); |
1019 | if ((!lseg) && list_is_singular(&data->pages)) | ||
1020 | lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, | ||
1021 | req_offset(req), desc->pg_count, | ||
1022 | IOMODE_RW, GFP_NOFS); | ||
1023 | 1018 | ||
1024 | if ((desc->pg_ioflags & FLUSH_COND_STABLE) && | 1019 | if ((desc->pg_ioflags & FLUSH_COND_STABLE) && |
1025 | (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit)) | 1020 | (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit)) |
1026 | desc->pg_ioflags &= ~FLUSH_COND_STABLE; | 1021 | desc->pg_ioflags &= ~FLUSH_COND_STABLE; |
1027 | 1022 | ||
1028 | /* Set up the argument struct */ | 1023 | /* Set up the argument struct */ |
1029 | ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, desc->pg_count, 0, lseg, desc->pg_ioflags); | 1024 | nfs_write_rpcsetup(req, data, desc->pg_count, 0, desc->pg_ioflags); |
1025 | list_add(&data->list, res); | ||
1026 | desc->pg_rpc_callops = &nfs_write_full_ops; | ||
1030 | out: | 1027 | out: |
1031 | put_lseg(lseg); /* Cleans any gotten in ->pg_test */ | ||
1032 | desc->pg_lseg = NULL; | ||
1033 | return ret; | 1028 | return ret; |
1034 | } | 1029 | } |
1035 | 1030 | ||
1036 | static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, | 1031 | int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct list_head *head) |
1032 | { | ||
1033 | if (desc->pg_bsize < PAGE_CACHE_SIZE) | ||
1034 | return nfs_flush_multi(desc, head); | ||
1035 | return nfs_flush_one(desc, head); | ||
1036 | } | ||
1037 | |||
1038 | static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) | ||
1039 | { | ||
1040 | LIST_HEAD(head); | ||
1041 | int ret; | ||
1042 | |||
1043 | ret = nfs_generic_flush(desc, &head); | ||
1044 | if (ret == 0) | ||
1045 | ret = nfs_do_multiple_writes(&head, desc->pg_rpc_callops, | ||
1046 | desc->pg_ioflags); | ||
1047 | return ret; | ||
1048 | } | ||
1049 | |||
1050 | static const struct nfs_pageio_ops nfs_pageio_write_ops = { | ||
1051 | .pg_test = nfs_generic_pg_test, | ||
1052 | .pg_doio = nfs_generic_pg_writepages, | ||
1053 | }; | ||
1054 | |||
1055 | static void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, | ||
1037 | struct inode *inode, int ioflags) | 1056 | struct inode *inode, int ioflags) |
1038 | { | 1057 | { |
1039 | size_t wsize = NFS_SERVER(inode)->wsize; | 1058 | nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, |
1059 | NFS_SERVER(inode)->wsize, ioflags); | ||
1060 | } | ||
1040 | 1061 | ||
1041 | if (wsize < PAGE_CACHE_SIZE) | 1062 | void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) |
1042 | nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags); | 1063 | { |
1043 | else | 1064 | pgio->pg_ops = &nfs_pageio_write_ops; |
1044 | nfs_pageio_init(pgio, inode, nfs_flush_one, wsize, ioflags); | 1065 | pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize; |
1066 | } | ||
1067 | EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); | ||
1068 | |||
1069 | static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, | ||
1070 | struct inode *inode, int ioflags) | ||
1071 | { | ||
1072 | if (!pnfs_pageio_init_write(pgio, inode, ioflags)) | ||
1073 | nfs_pageio_init_write_mds(pgio, inode, ioflags); | ||
1045 | } | 1074 | } |
1046 | 1075 | ||
1047 | /* | 1076 | /* |
@@ -1053,9 +1082,9 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata) | |||
1053 | 1082 | ||
1054 | dprintk("NFS: %5u write(%s/%lld %d@%lld)", | 1083 | dprintk("NFS: %5u write(%s/%lld %d@%lld)", |
1055 | task->tk_pid, | 1084 | task->tk_pid, |
1056 | data->req->wb_context->path.dentry->d_inode->i_sb->s_id, | 1085 | data->req->wb_context->dentry->d_inode->i_sb->s_id, |
1057 | (long long) | 1086 | (long long) |
1058 | NFS_FILEID(data->req->wb_context->path.dentry->d_inode), | 1087 | NFS_FILEID(data->req->wb_context->dentry->d_inode), |
1059 | data->req->wb_bytes, (long long)req_offset(data->req)); | 1088 | data->req->wb_bytes, (long long)req_offset(data->req)); |
1060 | 1089 | ||
1061 | nfs_writeback_done(task, data); | 1090 | nfs_writeback_done(task, data); |
@@ -1137,7 +1166,13 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) | |||
1137 | static void nfs_writeback_release_full(void *calldata) | 1166 | static void nfs_writeback_release_full(void *calldata) |
1138 | { | 1167 | { |
1139 | struct nfs_write_data *data = calldata; | 1168 | struct nfs_write_data *data = calldata; |
1140 | int status = data->task.tk_status; | 1169 | int ret, status = data->task.tk_status; |
1170 | struct nfs_pageio_descriptor pgio; | ||
1171 | |||
1172 | if (data->pnfs_error) { | ||
1173 | nfs_pageio_init_write_mds(&pgio, data->inode, FLUSH_STABLE); | ||
1174 | pgio.pg_recoalesce = 1; | ||
1175 | } | ||
1141 | 1176 | ||
1142 | /* Update attributes as result of writeback. */ | 1177 | /* Update attributes as result of writeback. */ |
1143 | while (!list_empty(&data->pages)) { | 1178 | while (!list_empty(&data->pages)) { |
@@ -1148,11 +1183,16 @@ static void nfs_writeback_release_full(void *calldata) | |||
1148 | 1183 | ||
1149 | dprintk("NFS: %5u write (%s/%lld %d@%lld)", | 1184 | dprintk("NFS: %5u write (%s/%lld %d@%lld)", |
1150 | data->task.tk_pid, | 1185 | data->task.tk_pid, |
1151 | req->wb_context->path.dentry->d_inode->i_sb->s_id, | 1186 | req->wb_context->dentry->d_inode->i_sb->s_id, |
1152 | (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), | 1187 | (long long)NFS_FILEID(req->wb_context->dentry->d_inode), |
1153 | req->wb_bytes, | 1188 | req->wb_bytes, |
1154 | (long long)req_offset(req)); | 1189 | (long long)req_offset(req)); |
1155 | 1190 | ||
1191 | if (data->pnfs_error) { | ||
1192 | dprintk(", pnfs error = %d\n", data->pnfs_error); | ||
1193 | goto next; | ||
1194 | } | ||
1195 | |||
1156 | if (status < 0) { | 1196 | if (status < 0) { |
1157 | nfs_set_pageerror(page); | 1197 | nfs_set_pageerror(page); |
1158 | nfs_context_set_write_error(req->wb_context, status); | 1198 | nfs_context_set_write_error(req->wb_context, status); |
@@ -1172,7 +1212,19 @@ remove_request: | |||
1172 | next: | 1212 | next: |
1173 | nfs_clear_page_tag_locked(req); | 1213 | nfs_clear_page_tag_locked(req); |
1174 | nfs_end_page_writeback(page); | 1214 | nfs_end_page_writeback(page); |
1215 | if (data->pnfs_error) { | ||
1216 | lock_page(page); | ||
1217 | nfs_pageio_cond_complete(&pgio, page->index); | ||
1218 | ret = nfs_page_async_flush(&pgio, page, 0); | ||
1219 | if (ret) { | ||
1220 | nfs_set_pageerror(page); | ||
1221 | dprintk("rewrite to MDS error = %d\n", ret); | ||
1222 | } | ||
1223 | unlock_page(page); | ||
1224 | } | ||
1175 | } | 1225 | } |
1226 | if (data->pnfs_error) | ||
1227 | nfs_pageio_complete(&pgio); | ||
1176 | nfs_writedata_release(calldata); | 1228 | nfs_writedata_release(calldata); |
1177 | } | 1229 | } |
1178 | 1230 | ||
@@ -1347,7 +1399,7 @@ void nfs_init_commit(struct nfs_write_data *data, | |||
1347 | struct pnfs_layout_segment *lseg) | 1399 | struct pnfs_layout_segment *lseg) |
1348 | { | 1400 | { |
1349 | struct nfs_page *first = nfs_list_entry(head->next); | 1401 | struct nfs_page *first = nfs_list_entry(head->next); |
1350 | struct inode *inode = first->wb_context->path.dentry->d_inode; | 1402 | struct inode *inode = first->wb_context->dentry->d_inode; |
1351 | 1403 | ||
1352 | /* Set up the RPC argument and reply structs | 1404 | /* Set up the RPC argument and reply structs |
1353 | * NB: take care not to mess about with data->commit et al. */ | 1405 | * NB: take care not to mess about with data->commit et al. */ |
@@ -1435,8 +1487,8 @@ void nfs_commit_release_pages(struct nfs_write_data *data) | |||
1435 | nfs_clear_request_commit(req); | 1487 | nfs_clear_request_commit(req); |
1436 | 1488 | ||
1437 | dprintk("NFS: commit (%s/%lld %d@%lld)", | 1489 | dprintk("NFS: commit (%s/%lld %d@%lld)", |
1438 | req->wb_context->path.dentry->d_inode->i_sb->s_id, | 1490 | req->wb_context->dentry->d_sb->s_id, |
1439 | (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), | 1491 | (long long)NFS_FILEID(req->wb_context->dentry->d_inode), |
1440 | req->wb_bytes, | 1492 | req->wb_bytes, |
1441 | (long long)req_offset(req)); | 1493 | (long long)req_offset(req)); |
1442 | if (status < 0) { | 1494 | if (status < 0) { |
@@ -1525,6 +1577,10 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr | |||
1525 | int flags = FLUSH_SYNC; | 1577 | int flags = FLUSH_SYNC; |
1526 | int ret = 0; | 1578 | int ret = 0; |
1527 | 1579 | ||
1580 | /* no commits means nothing needs to be done */ | ||
1581 | if (!nfsi->ncommit) | ||
1582 | return ret; | ||
1583 | |||
1528 | if (wbc->sync_mode == WB_SYNC_NONE) { | 1584 | if (wbc->sync_mode == WB_SYNC_NONE) { |
1529 | /* Don't commit yet if this is a non-blocking flush and there | 1585 | /* Don't commit yet if this is a non-blocking flush and there |
1530 | * are a lot of outstanding writes for this mapping. | 1586 | * are a lot of outstanding writes for this mapping. |
@@ -1566,8 +1622,7 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
1566 | int status; | 1622 | int status; |
1567 | bool sync = true; | 1623 | bool sync = true; |
1568 | 1624 | ||
1569 | if (wbc->sync_mode == WB_SYNC_NONE || wbc->nonblocking || | 1625 | if (wbc->sync_mode == WB_SYNC_NONE) |
1570 | wbc->for_background) | ||
1571 | sync = false; | 1626 | sync = false; |
1572 | 1627 | ||
1573 | status = pnfs_layoutcommit_inode(inode, sync); | 1628 | status = pnfs_layoutcommit_inode(inode, sync); |
@@ -1659,34 +1714,20 @@ out_error: | |||
1659 | int nfs_migrate_page(struct address_space *mapping, struct page *newpage, | 1714 | int nfs_migrate_page(struct address_space *mapping, struct page *newpage, |
1660 | struct page *page) | 1715 | struct page *page) |
1661 | { | 1716 | { |
1662 | struct nfs_page *req; | 1717 | /* |
1663 | int ret; | 1718 | * If PagePrivate is set, then the page is currently associated with |
1719 | * an in-progress read or write request. Don't try to migrate it. | ||
1720 | * | ||
1721 | * FIXME: we could do this in principle, but we'll need a way to ensure | ||
1722 | * that we can safely release the inode reference while holding | ||
1723 | * the page lock. | ||
1724 | */ | ||
1725 | if (PagePrivate(page)) | ||
1726 | return -EBUSY; | ||
1664 | 1727 | ||
1665 | nfs_fscache_release_page(page, GFP_KERNEL); | 1728 | nfs_fscache_release_page(page, GFP_KERNEL); |
1666 | 1729 | ||
1667 | req = nfs_find_and_lock_request(page, false); | 1730 | return migrate_page(mapping, newpage, page); |
1668 | ret = PTR_ERR(req); | ||
1669 | if (IS_ERR(req)) | ||
1670 | goto out; | ||
1671 | |||
1672 | ret = migrate_page(mapping, newpage, page); | ||
1673 | if (!req) | ||
1674 | goto out; | ||
1675 | if (ret) | ||
1676 | goto out_unlock; | ||
1677 | page_cache_get(newpage); | ||
1678 | spin_lock(&mapping->host->i_lock); | ||
1679 | req->wb_page = newpage; | ||
1680 | SetPagePrivate(newpage); | ||
1681 | set_page_private(newpage, (unsigned long)req); | ||
1682 | ClearPagePrivate(page); | ||
1683 | set_page_private(page, 0); | ||
1684 | spin_unlock(&mapping->host->i_lock); | ||
1685 | page_cache_release(page); | ||
1686 | out_unlock: | ||
1687 | nfs_clear_page_tag_locked(req); | ||
1688 | out: | ||
1689 | return ret; | ||
1690 | } | 1731 | } |
1691 | #endif | 1732 | #endif |
1692 | 1733 | ||