diff options
47 files changed, 2498 insertions, 3289 deletions
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index daa8e7514eae..9106f42c472c 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c | |||
@@ -159,6 +159,12 @@ static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res, | |||
159 | 159 | ||
160 | msg.rpc_proc = &clnt->cl_procinfo[proc]; | 160 | msg.rpc_proc = &clnt->cl_procinfo[proc]; |
161 | status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFTCONN); | 161 | status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFTCONN); |
162 | if (status == -ECONNREFUSED) { | ||
163 | dprintk("lockd: NSM upcall RPC failed, status=%d, forcing rebind\n", | ||
164 | status); | ||
165 | rpc_force_rebind(clnt); | ||
166 | status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFTCONN); | ||
167 | } | ||
162 | if (status < 0) | 168 | if (status < 0) |
163 | dprintk("lockd: NSM upcall RPC failed, status=%d\n", | 169 | dprintk("lockd: NSM upcall RPC failed, status=%d\n", |
164 | status); | 170 | status); |
diff --git a/fs/nfs/blocklayout/Makefile b/fs/nfs/blocklayout/Makefile index d5815505c020..3ca14c36d08b 100644 --- a/fs/nfs/blocklayout/Makefile +++ b/fs/nfs/blocklayout/Makefile | |||
@@ -2,4 +2,5 @@ | |||
2 | # Makefile for the pNFS block layout driver kernel module | 2 | # Makefile for the pNFS block layout driver kernel module |
3 | # | 3 | # |
4 | obj-$(CONFIG_PNFS_BLOCK) += blocklayoutdriver.o | 4 | obj-$(CONFIG_PNFS_BLOCK) += blocklayoutdriver.o |
5 | blocklayoutdriver-objs := blocklayout.o extents.o blocklayoutdev.o blocklayoutdm.o | 5 | |
6 | blocklayoutdriver-y += blocklayout.o dev.o extent_tree.o rpc_pipefs.o | ||
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index cbb1797149d5..5228f201d3d5 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c | |||
@@ -35,7 +35,6 @@ | |||
35 | #include <linux/mount.h> | 35 | #include <linux/mount.h> |
36 | #include <linux/namei.h> | 36 | #include <linux/namei.h> |
37 | #include <linux/bio.h> /* struct bio */ | 37 | #include <linux/bio.h> /* struct bio */ |
38 | #include <linux/buffer_head.h> /* various write calls */ | ||
39 | #include <linux/prefetch.h> | 38 | #include <linux/prefetch.h> |
40 | #include <linux/pagevec.h> | 39 | #include <linux/pagevec.h> |
41 | 40 | ||
@@ -50,40 +49,16 @@ MODULE_LICENSE("GPL"); | |||
50 | MODULE_AUTHOR("Andy Adamson <andros@citi.umich.edu>"); | 49 | MODULE_AUTHOR("Andy Adamson <andros@citi.umich.edu>"); |
51 | MODULE_DESCRIPTION("The NFSv4.1 pNFS Block layout driver"); | 50 | MODULE_DESCRIPTION("The NFSv4.1 pNFS Block layout driver"); |
52 | 51 | ||
53 | static void print_page(struct page *page) | 52 | static bool is_hole(struct pnfs_block_extent *be) |
54 | { | 53 | { |
55 | dprintk("PRINTPAGE page %p\n", page); | 54 | switch (be->be_state) { |
56 | dprintk(" PagePrivate %d\n", PagePrivate(page)); | 55 | case PNFS_BLOCK_NONE_DATA: |
57 | dprintk(" PageUptodate %d\n", PageUptodate(page)); | 56 | return true; |
58 | dprintk(" PageError %d\n", PageError(page)); | 57 | case PNFS_BLOCK_INVALID_DATA: |
59 | dprintk(" PageDirty %d\n", PageDirty(page)); | 58 | return be->be_tag ? false : true; |
60 | dprintk(" PageReferenced %d\n", PageReferenced(page)); | 59 | default: |
61 | dprintk(" PageLocked %d\n", PageLocked(page)); | 60 | return false; |
62 | dprintk(" PageWriteback %d\n", PageWriteback(page)); | 61 | } |
63 | dprintk(" PageMappedToDisk %d\n", PageMappedToDisk(page)); | ||
64 | dprintk("\n"); | ||
65 | } | ||
66 | |||
67 | /* Given the be associated with isect, determine if page data needs to be | ||
68 | * initialized. | ||
69 | */ | ||
70 | static int is_hole(struct pnfs_block_extent *be, sector_t isect) | ||
71 | { | ||
72 | if (be->be_state == PNFS_BLOCK_NONE_DATA) | ||
73 | return 1; | ||
74 | else if (be->be_state != PNFS_BLOCK_INVALID_DATA) | ||
75 | return 0; | ||
76 | else | ||
77 | return !bl_is_sector_init(be->be_inval, isect); | ||
78 | } | ||
79 | |||
80 | /* Given the be associated with isect, determine if page data can be | ||
81 | * written to disk. | ||
82 | */ | ||
83 | static int is_writable(struct pnfs_block_extent *be, sector_t isect) | ||
84 | { | ||
85 | return (be->be_state == PNFS_BLOCK_READWRITE_DATA || | ||
86 | be->be_state == PNFS_BLOCK_INVALID_DATA); | ||
87 | } | 62 | } |
88 | 63 | ||
89 | /* The data we are handed might be spread across several bios. We need | 64 | /* The data we are handed might be spread across several bios. We need |
@@ -91,9 +66,8 @@ static int is_writable(struct pnfs_block_extent *be, sector_t isect) | |||
91 | */ | 66 | */ |
92 | struct parallel_io { | 67 | struct parallel_io { |
93 | struct kref refcnt; | 68 | struct kref refcnt; |
94 | void (*pnfs_callback) (void *data, int num_se); | 69 | void (*pnfs_callback) (void *data); |
95 | void *data; | 70 | void *data; |
96 | int bse_count; | ||
97 | }; | 71 | }; |
98 | 72 | ||
99 | static inline struct parallel_io *alloc_parallel(void *data) | 73 | static inline struct parallel_io *alloc_parallel(void *data) |
@@ -104,7 +78,6 @@ static inline struct parallel_io *alloc_parallel(void *data) | |||
104 | if (rv) { | 78 | if (rv) { |
105 | rv->data = data; | 79 | rv->data = data; |
106 | kref_init(&rv->refcnt); | 80 | kref_init(&rv->refcnt); |
107 | rv->bse_count = 0; | ||
108 | } | 81 | } |
109 | return rv; | 82 | return rv; |
110 | } | 83 | } |
@@ -119,7 +92,7 @@ static void destroy_parallel(struct kref *kref) | |||
119 | struct parallel_io *p = container_of(kref, struct parallel_io, refcnt); | 92 | struct parallel_io *p = container_of(kref, struct parallel_io, refcnt); |
120 | 93 | ||
121 | dprintk("%s enter\n", __func__); | 94 | dprintk("%s enter\n", __func__); |
122 | p->pnfs_callback(p->data, p->bse_count); | 95 | p->pnfs_callback(p->data); |
123 | kfree(p); | 96 | kfree(p); |
124 | } | 97 | } |
125 | 98 | ||
@@ -141,10 +114,9 @@ bl_submit_bio(int rw, struct bio *bio) | |||
141 | return NULL; | 114 | return NULL; |
142 | } | 115 | } |
143 | 116 | ||
144 | static struct bio *bl_alloc_init_bio(int npg, sector_t isect, | 117 | static struct bio * |
145 | struct pnfs_block_extent *be, | 118 | bl_alloc_init_bio(int npg, struct block_device *bdev, sector_t disk_sector, |
146 | void (*end_io)(struct bio *, int err), | 119 | void (*end_io)(struct bio *, int err), struct parallel_io *par) |
147 | struct parallel_io *par) | ||
148 | { | 120 | { |
149 | struct bio *bio; | 121 | struct bio *bio; |
150 | 122 | ||
@@ -156,58 +128,64 @@ static struct bio *bl_alloc_init_bio(int npg, sector_t isect, | |||
156 | } | 128 | } |
157 | 129 | ||
158 | if (bio) { | 130 | if (bio) { |
159 | bio->bi_iter.bi_sector = isect - be->be_f_offset + | 131 | bio->bi_iter.bi_sector = disk_sector; |
160 | be->be_v_offset; | 132 | bio->bi_bdev = bdev; |
161 | bio->bi_bdev = be->be_mdev; | ||
162 | bio->bi_end_io = end_io; | 133 | bio->bi_end_io = end_io; |
163 | bio->bi_private = par; | 134 | bio->bi_private = par; |
164 | } | 135 | } |
165 | return bio; | 136 | return bio; |
166 | } | 137 | } |
167 | 138 | ||
168 | static struct bio *do_add_page_to_bio(struct bio *bio, int npg, int rw, | 139 | static struct bio * |
169 | sector_t isect, struct page *page, | 140 | do_add_page_to_bio(struct bio *bio, int npg, int rw, sector_t isect, |
170 | struct pnfs_block_extent *be, | 141 | struct page *page, struct pnfs_block_dev_map *map, |
171 | void (*end_io)(struct bio *, int err), | 142 | struct pnfs_block_extent *be, |
172 | struct parallel_io *par, | 143 | void (*end_io)(struct bio *, int err), |
173 | unsigned int offset, int len) | 144 | struct parallel_io *par, unsigned int offset, int *len) |
174 | { | 145 | { |
175 | isect = isect + (offset >> SECTOR_SHIFT); | 146 | struct pnfs_block_dev *dev = |
147 | container_of(be->be_device, struct pnfs_block_dev, node); | ||
148 | u64 disk_addr, end; | ||
149 | |||
176 | dprintk("%s: npg %d rw %d isect %llu offset %u len %d\n", __func__, | 150 | dprintk("%s: npg %d rw %d isect %llu offset %u len %d\n", __func__, |
177 | npg, rw, (unsigned long long)isect, offset, len); | 151 | npg, rw, (unsigned long long)isect, offset, *len); |
152 | |||
153 | /* translate to device offset */ | ||
154 | isect += be->be_v_offset; | ||
155 | isect -= be->be_f_offset; | ||
156 | |||
157 | /* translate to physical disk offset */ | ||
158 | disk_addr = (u64)isect << SECTOR_SHIFT; | ||
159 | if (disk_addr < map->start || disk_addr >= map->start + map->len) { | ||
160 | if (!dev->map(dev, disk_addr, map)) | ||
161 | return ERR_PTR(-EIO); | ||
162 | bio = bl_submit_bio(rw, bio); | ||
163 | } | ||
164 | disk_addr += map->disk_offset; | ||
165 | disk_addr -= map->start; | ||
166 | |||
167 | /* limit length to what the device mapping allows */ | ||
168 | end = disk_addr + *len; | ||
169 | if (end >= map->start + map->len) | ||
170 | *len = map->start + map->len - disk_addr; | ||
171 | |||
178 | retry: | 172 | retry: |
179 | if (!bio) { | 173 | if (!bio) { |
180 | bio = bl_alloc_init_bio(npg, isect, be, end_io, par); | 174 | bio = bl_alloc_init_bio(npg, map->bdev, |
175 | disk_addr >> SECTOR_SHIFT, end_io, par); | ||
181 | if (!bio) | 176 | if (!bio) |
182 | return ERR_PTR(-ENOMEM); | 177 | return ERR_PTR(-ENOMEM); |
183 | } | 178 | } |
184 | if (bio_add_page(bio, page, len, offset) < len) { | 179 | if (bio_add_page(bio, page, *len, offset) < *len) { |
185 | bio = bl_submit_bio(rw, bio); | 180 | bio = bl_submit_bio(rw, bio); |
186 | goto retry; | 181 | goto retry; |
187 | } | 182 | } |
188 | return bio; | 183 | return bio; |
189 | } | 184 | } |
190 | 185 | ||
191 | static struct bio *bl_add_page_to_bio(struct bio *bio, int npg, int rw, | ||
192 | sector_t isect, struct page *page, | ||
193 | struct pnfs_block_extent *be, | ||
194 | void (*end_io)(struct bio *, int err), | ||
195 | struct parallel_io *par) | ||
196 | { | ||
197 | return do_add_page_to_bio(bio, npg, rw, isect, page, be, | ||
198 | end_io, par, 0, PAGE_CACHE_SIZE); | ||
199 | } | ||
200 | |||
201 | /* This is basically copied from mpage_end_io_read */ | ||
202 | static void bl_end_io_read(struct bio *bio, int err) | 186 | static void bl_end_io_read(struct bio *bio, int err) |
203 | { | 187 | { |
204 | struct parallel_io *par = bio->bi_private; | 188 | struct parallel_io *par = bio->bi_private; |
205 | struct bio_vec *bvec; | ||
206 | int i; | ||
207 | |||
208 | if (!err) | ||
209 | bio_for_each_segment_all(bvec, bio, i) | ||
210 | SetPageUptodate(bvec->bv_page); | ||
211 | 189 | ||
212 | if (err) { | 190 | if (err) { |
213 | struct nfs_pgio_header *header = par->data; | 191 | struct nfs_pgio_header *header = par->data; |
@@ -216,6 +194,7 @@ static void bl_end_io_read(struct bio *bio, int err) | |||
216 | header->pnfs_error = -EIO; | 194 | header->pnfs_error = -EIO; |
217 | pnfs_set_lo_fail(header->lseg); | 195 | pnfs_set_lo_fail(header->lseg); |
218 | } | 196 | } |
197 | |||
219 | bio_put(bio); | 198 | bio_put(bio); |
220 | put_parallel(par); | 199 | put_parallel(par); |
221 | } | 200 | } |
@@ -231,7 +210,7 @@ static void bl_read_cleanup(struct work_struct *work) | |||
231 | } | 210 | } |
232 | 211 | ||
233 | static void | 212 | static void |
234 | bl_end_par_io_read(void *data, int unused) | 213 | bl_end_par_io_read(void *data) |
235 | { | 214 | { |
236 | struct nfs_pgio_header *hdr = data; | 215 | struct nfs_pgio_header *hdr = data; |
237 | 216 | ||
@@ -241,88 +220,78 @@ bl_end_par_io_read(void *data, int unused) | |||
241 | } | 220 | } |
242 | 221 | ||
243 | static enum pnfs_try_status | 222 | static enum pnfs_try_status |
244 | bl_read_pagelist(struct nfs_pgio_header *hdr) | 223 | bl_read_pagelist(struct nfs_pgio_header *header) |
245 | { | 224 | { |
246 | struct nfs_pgio_header *header = hdr; | 225 | struct pnfs_block_layout *bl = BLK_LSEG2EXT(header->lseg); |
247 | int i, hole; | 226 | struct pnfs_block_dev_map map = { .start = NFS4_MAX_UINT64 }; |
248 | struct bio *bio = NULL; | 227 | struct bio *bio = NULL; |
249 | struct pnfs_block_extent *be = NULL, *cow_read = NULL; | 228 | struct pnfs_block_extent be; |
250 | sector_t isect, extent_length = 0; | 229 | sector_t isect, extent_length = 0; |
251 | struct parallel_io *par; | 230 | struct parallel_io *par; |
252 | loff_t f_offset = hdr->args.offset; | 231 | loff_t f_offset = header->args.offset; |
253 | size_t bytes_left = hdr->args.count; | 232 | size_t bytes_left = header->args.count; |
254 | unsigned int pg_offset, pg_len; | 233 | unsigned int pg_offset, pg_len; |
255 | struct page **pages = hdr->args.pages; | 234 | struct page **pages = header->args.pages; |
256 | int pg_index = hdr->args.pgbase >> PAGE_CACHE_SHIFT; | 235 | int pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT; |
257 | const bool is_dio = (header->dreq != NULL); | 236 | const bool is_dio = (header->dreq != NULL); |
237 | struct blk_plug plug; | ||
238 | int i; | ||
258 | 239 | ||
259 | dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__, | 240 | dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__, |
260 | hdr->page_array.npages, f_offset, | 241 | header->page_array.npages, f_offset, |
261 | (unsigned int)hdr->args.count); | 242 | (unsigned int)header->args.count); |
262 | 243 | ||
263 | par = alloc_parallel(hdr); | 244 | par = alloc_parallel(header); |
264 | if (!par) | 245 | if (!par) |
265 | goto use_mds; | 246 | return PNFS_NOT_ATTEMPTED; |
266 | par->pnfs_callback = bl_end_par_io_read; | 247 | par->pnfs_callback = bl_end_par_io_read; |
267 | /* At this point, we can no longer jump to use_mds */ | 248 | |
249 | blk_start_plug(&plug); | ||
268 | 250 | ||
269 | isect = (sector_t) (f_offset >> SECTOR_SHIFT); | 251 | isect = (sector_t) (f_offset >> SECTOR_SHIFT); |
270 | /* Code assumes extents are page-aligned */ | 252 | /* Code assumes extents are page-aligned */ |
271 | for (i = pg_index; i < hdr->page_array.npages; i++) { | 253 | for (i = pg_index; i < header->page_array.npages; i++) { |
272 | if (!extent_length) { | 254 | if (extent_length <= 0) { |
273 | /* We've used up the previous extent */ | 255 | /* We've used up the previous extent */ |
274 | bl_put_extent(be); | ||
275 | bl_put_extent(cow_read); | ||
276 | bio = bl_submit_bio(READ, bio); | 256 | bio = bl_submit_bio(READ, bio); |
257 | |||
277 | /* Get the next one */ | 258 | /* Get the next one */ |
278 | be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg), | 259 | if (!ext_tree_lookup(bl, isect, &be, false)) { |
279 | isect, &cow_read); | ||
280 | if (!be) { | ||
281 | header->pnfs_error = -EIO; | 260 | header->pnfs_error = -EIO; |
282 | goto out; | 261 | goto out; |
283 | } | 262 | } |
284 | extent_length = be->be_length - | 263 | extent_length = be.be_length - (isect - be.be_f_offset); |
285 | (isect - be->be_f_offset); | ||
286 | if (cow_read) { | ||
287 | sector_t cow_length = cow_read->be_length - | ||
288 | (isect - cow_read->be_f_offset); | ||
289 | extent_length = min(extent_length, cow_length); | ||
290 | } | ||
291 | } | 264 | } |
292 | 265 | ||
266 | pg_offset = f_offset & ~PAGE_CACHE_MASK; | ||
293 | if (is_dio) { | 267 | if (is_dio) { |
294 | pg_offset = f_offset & ~PAGE_CACHE_MASK; | ||
295 | if (pg_offset + bytes_left > PAGE_CACHE_SIZE) | 268 | if (pg_offset + bytes_left > PAGE_CACHE_SIZE) |
296 | pg_len = PAGE_CACHE_SIZE - pg_offset; | 269 | pg_len = PAGE_CACHE_SIZE - pg_offset; |
297 | else | 270 | else |
298 | pg_len = bytes_left; | 271 | pg_len = bytes_left; |
299 | |||
300 | f_offset += pg_len; | ||
301 | bytes_left -= pg_len; | ||
302 | isect += (pg_offset >> SECTOR_SHIFT); | ||
303 | } else { | 272 | } else { |
304 | pg_offset = 0; | 273 | BUG_ON(pg_offset != 0); |
305 | pg_len = PAGE_CACHE_SIZE; | 274 | pg_len = PAGE_CACHE_SIZE; |
306 | } | 275 | } |
307 | 276 | ||
308 | hole = is_hole(be, isect); | 277 | isect += (pg_offset >> SECTOR_SHIFT); |
309 | if (hole && !cow_read) { | 278 | extent_length -= (pg_offset >> SECTOR_SHIFT); |
279 | |||
280 | if (is_hole(&be)) { | ||
310 | bio = bl_submit_bio(READ, bio); | 281 | bio = bl_submit_bio(READ, bio); |
311 | /* Fill hole w/ zeroes w/o accessing device */ | 282 | /* Fill hole w/ zeroes w/o accessing device */ |
312 | dprintk("%s Zeroing page for hole\n", __func__); | 283 | dprintk("%s Zeroing page for hole\n", __func__); |
313 | zero_user_segment(pages[i], pg_offset, pg_len); | 284 | zero_user_segment(pages[i], pg_offset, pg_len); |
314 | print_page(pages[i]); | ||
315 | SetPageUptodate(pages[i]); | ||
316 | } else { | ||
317 | struct pnfs_block_extent *be_read; | ||
318 | 285 | ||
319 | be_read = (hole && cow_read) ? cow_read : be; | 286 | /* invalidate map */ |
287 | map.start = NFS4_MAX_UINT64; | ||
288 | } else { | ||
320 | bio = do_add_page_to_bio(bio, | 289 | bio = do_add_page_to_bio(bio, |
321 | hdr->page_array.npages - i, | 290 | header->page_array.npages - i, |
322 | READ, | 291 | READ, |
323 | isect, pages[i], be_read, | 292 | isect, pages[i], &map, &be, |
324 | bl_end_io_read, par, | 293 | bl_end_io_read, par, |
325 | pg_offset, pg_len); | 294 | pg_offset, &pg_len); |
326 | if (IS_ERR(bio)) { | 295 | if (IS_ERR(bio)) { |
327 | header->pnfs_error = PTR_ERR(bio); | 296 | header->pnfs_error = PTR_ERR(bio); |
328 | bio = NULL; | 297 | bio = NULL; |
@@ -330,75 +299,21 @@ bl_read_pagelist(struct nfs_pgio_header *hdr) | |||
330 | } | 299 | } |
331 | } | 300 | } |
332 | isect += (pg_len >> SECTOR_SHIFT); | 301 | isect += (pg_len >> SECTOR_SHIFT); |
333 | extent_length -= PAGE_CACHE_SECTORS; | 302 | extent_length -= (pg_len >> SECTOR_SHIFT); |
303 | f_offset += pg_len; | ||
304 | bytes_left -= pg_len; | ||
334 | } | 305 | } |
335 | if ((isect << SECTOR_SHIFT) >= header->inode->i_size) { | 306 | if ((isect << SECTOR_SHIFT) >= header->inode->i_size) { |
336 | hdr->res.eof = 1; | 307 | header->res.eof = 1; |
337 | hdr->res.count = header->inode->i_size - hdr->args.offset; | 308 | header->res.count = header->inode->i_size - header->args.offset; |
338 | } else { | 309 | } else { |
339 | hdr->res.count = (isect << SECTOR_SHIFT) - hdr->args.offset; | 310 | header->res.count = (isect << SECTOR_SHIFT) - header->args.offset; |
340 | } | 311 | } |
341 | out: | 312 | out: |
342 | bl_put_extent(be); | ||
343 | bl_put_extent(cow_read); | ||
344 | bl_submit_bio(READ, bio); | 313 | bl_submit_bio(READ, bio); |
314 | blk_finish_plug(&plug); | ||
345 | put_parallel(par); | 315 | put_parallel(par); |
346 | return PNFS_ATTEMPTED; | 316 | return PNFS_ATTEMPTED; |
347 | |||
348 | use_mds: | ||
349 | dprintk("Giving up and using normal NFS\n"); | ||
350 | return PNFS_NOT_ATTEMPTED; | ||
351 | } | ||
352 | |||
353 | static void mark_extents_written(struct pnfs_block_layout *bl, | ||
354 | __u64 offset, __u32 count) | ||
355 | { | ||
356 | sector_t isect, end; | ||
357 | struct pnfs_block_extent *be; | ||
358 | struct pnfs_block_short_extent *se; | ||
359 | |||
360 | dprintk("%s(%llu, %u)\n", __func__, offset, count); | ||
361 | if (count == 0) | ||
362 | return; | ||
363 | isect = (offset & (long)(PAGE_CACHE_MASK)) >> SECTOR_SHIFT; | ||
364 | end = (offset + count + PAGE_CACHE_SIZE - 1) & (long)(PAGE_CACHE_MASK); | ||
365 | end >>= SECTOR_SHIFT; | ||
366 | while (isect < end) { | ||
367 | sector_t len; | ||
368 | be = bl_find_get_extent(bl, isect, NULL); | ||
369 | BUG_ON(!be); /* FIXME */ | ||
370 | len = min(end, be->be_f_offset + be->be_length) - isect; | ||
371 | if (be->be_state == PNFS_BLOCK_INVALID_DATA) { | ||
372 | se = bl_pop_one_short_extent(be->be_inval); | ||
373 | BUG_ON(!se); | ||
374 | bl_mark_for_commit(be, isect, len, se); | ||
375 | } | ||
376 | isect += len; | ||
377 | bl_put_extent(be); | ||
378 | } | ||
379 | } | ||
380 | |||
381 | static void bl_end_io_write_zero(struct bio *bio, int err) | ||
382 | { | ||
383 | struct parallel_io *par = bio->bi_private; | ||
384 | struct bio_vec *bvec; | ||
385 | int i; | ||
386 | |||
387 | bio_for_each_segment_all(bvec, bio, i) { | ||
388 | /* This is the zeroing page we added */ | ||
389 | end_page_writeback(bvec->bv_page); | ||
390 | page_cache_release(bvec->bv_page); | ||
391 | } | ||
392 | |||
393 | if (unlikely(err)) { | ||
394 | struct nfs_pgio_header *header = par->data; | ||
395 | |||
396 | if (!header->pnfs_error) | ||
397 | header->pnfs_error = -EIO; | ||
398 | pnfs_set_lo_fail(header->lseg); | ||
399 | } | ||
400 | bio_put(bio); | ||
401 | put_parallel(par); | ||
402 | } | 317 | } |
403 | 318 | ||
404 | static void bl_end_io_write(struct bio *bio, int err) | 319 | static void bl_end_io_write(struct bio *bio, int err) |
@@ -421,533 +336,118 @@ static void bl_end_io_write(struct bio *bio, int err) | |||
421 | */ | 336 | */ |
422 | static void bl_write_cleanup(struct work_struct *work) | 337 | static void bl_write_cleanup(struct work_struct *work) |
423 | { | 338 | { |
424 | struct rpc_task *task; | 339 | struct rpc_task *task = container_of(work, struct rpc_task, u.tk_work); |
425 | struct nfs_pgio_header *hdr; | 340 | struct nfs_pgio_header *hdr = |
341 | container_of(task, struct nfs_pgio_header, task); | ||
342 | |||
426 | dprintk("%s enter\n", __func__); | 343 | dprintk("%s enter\n", __func__); |
427 | task = container_of(work, struct rpc_task, u.tk_work); | 344 | |
428 | hdr = container_of(task, struct nfs_pgio_header, task); | ||
429 | if (likely(!hdr->pnfs_error)) { | 345 | if (likely(!hdr->pnfs_error)) { |
430 | /* Marks for LAYOUTCOMMIT */ | 346 | struct pnfs_block_layout *bl = BLK_LSEG2EXT(hdr->lseg); |
431 | mark_extents_written(BLK_LSEG2EXT(hdr->lseg), | 347 | u64 start = hdr->args.offset & (loff_t)PAGE_CACHE_MASK; |
432 | hdr->args.offset, hdr->args.count); | 348 | u64 end = (hdr->args.offset + hdr->args.count + |
349 | PAGE_CACHE_SIZE - 1) & (loff_t)PAGE_CACHE_MASK; | ||
350 | |||
351 | ext_tree_mark_written(bl, start >> SECTOR_SHIFT, | ||
352 | (end - start) >> SECTOR_SHIFT); | ||
433 | } | 353 | } |
354 | |||
434 | pnfs_ld_write_done(hdr); | 355 | pnfs_ld_write_done(hdr); |
435 | } | 356 | } |
436 | 357 | ||
437 | /* Called when last of bios associated with a bl_write_pagelist call finishes */ | 358 | /* Called when last of bios associated with a bl_write_pagelist call finishes */ |
438 | static void bl_end_par_io_write(void *data, int num_se) | 359 | static void bl_end_par_io_write(void *data) |
439 | { | 360 | { |
440 | struct nfs_pgio_header *hdr = data; | 361 | struct nfs_pgio_header *hdr = data; |
441 | 362 | ||
442 | if (unlikely(hdr->pnfs_error)) { | ||
443 | bl_free_short_extents(&BLK_LSEG2EXT(hdr->lseg)->bl_inval, | ||
444 | num_se); | ||
445 | } | ||
446 | |||
447 | hdr->task.tk_status = hdr->pnfs_error; | 363 | hdr->task.tk_status = hdr->pnfs_error; |
448 | hdr->verf.committed = NFS_FILE_SYNC; | 364 | hdr->verf.committed = NFS_FILE_SYNC; |
449 | INIT_WORK(&hdr->task.u.tk_work, bl_write_cleanup); | 365 | INIT_WORK(&hdr->task.u.tk_work, bl_write_cleanup); |
450 | schedule_work(&hdr->task.u.tk_work); | 366 | schedule_work(&hdr->task.u.tk_work); |
451 | } | 367 | } |
452 | 368 | ||
453 | /* FIXME STUB - mark intersection of layout and page as bad, so is not | ||
454 | * used again. | ||
455 | */ | ||
456 | static void mark_bad_read(void) | ||
457 | { | ||
458 | return; | ||
459 | } | ||
460 | |||
461 | /* | ||
462 | * map_block: map a requested I/0 block (isect) into an offset in the LVM | ||
463 | * block_device | ||
464 | */ | ||
465 | static void | ||
466 | map_block(struct buffer_head *bh, sector_t isect, struct pnfs_block_extent *be) | ||
467 | { | ||
468 | dprintk("%s enter be=%p\n", __func__, be); | ||
469 | |||
470 | set_buffer_mapped(bh); | ||
471 | bh->b_bdev = be->be_mdev; | ||
472 | bh->b_blocknr = (isect - be->be_f_offset + be->be_v_offset) >> | ||
473 | (be->be_mdev->bd_inode->i_blkbits - SECTOR_SHIFT); | ||
474 | |||
475 | dprintk("%s isect %llu, bh->b_blocknr %ld, using bsize %Zd\n", | ||
476 | __func__, (unsigned long long)isect, (long)bh->b_blocknr, | ||
477 | bh->b_size); | ||
478 | return; | ||
479 | } | ||
480 | |||
481 | static void | ||
482 | bl_read_single_end_io(struct bio *bio, int error) | ||
483 | { | ||
484 | struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; | ||
485 | struct page *page = bvec->bv_page; | ||
486 | |||
487 | /* Only one page in bvec */ | ||
488 | unlock_page(page); | ||
489 | } | ||
490 | |||
491 | static int | ||
492 | bl_do_readpage_sync(struct page *page, struct pnfs_block_extent *be, | ||
493 | unsigned int offset, unsigned int len) | ||
494 | { | ||
495 | struct bio *bio; | ||
496 | struct page *shadow_page; | ||
497 | sector_t isect; | ||
498 | char *kaddr, *kshadow_addr; | ||
499 | int ret = 0; | ||
500 | |||
501 | dprintk("%s: offset %u len %u\n", __func__, offset, len); | ||
502 | |||
503 | shadow_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | ||
504 | if (shadow_page == NULL) | ||
505 | return -ENOMEM; | ||
506 | |||
507 | bio = bio_alloc(GFP_NOIO, 1); | ||
508 | if (bio == NULL) | ||
509 | return -ENOMEM; | ||
510 | |||
511 | isect = (page->index << PAGE_CACHE_SECTOR_SHIFT) + | ||
512 | (offset / SECTOR_SIZE); | ||
513 | |||
514 | bio->bi_iter.bi_sector = isect - be->be_f_offset + be->be_v_offset; | ||
515 | bio->bi_bdev = be->be_mdev; | ||
516 | bio->bi_end_io = bl_read_single_end_io; | ||
517 | |||
518 | lock_page(shadow_page); | ||
519 | if (bio_add_page(bio, shadow_page, | ||
520 | SECTOR_SIZE, round_down(offset, SECTOR_SIZE)) == 0) { | ||
521 | unlock_page(shadow_page); | ||
522 | bio_put(bio); | ||
523 | return -EIO; | ||
524 | } | ||
525 | |||
526 | submit_bio(READ, bio); | ||
527 | wait_on_page_locked(shadow_page); | ||
528 | if (unlikely(!test_bit(BIO_UPTODATE, &bio->bi_flags))) { | ||
529 | ret = -EIO; | ||
530 | } else { | ||
531 | kaddr = kmap_atomic(page); | ||
532 | kshadow_addr = kmap_atomic(shadow_page); | ||
533 | memcpy(kaddr + offset, kshadow_addr + offset, len); | ||
534 | kunmap_atomic(kshadow_addr); | ||
535 | kunmap_atomic(kaddr); | ||
536 | } | ||
537 | __free_page(shadow_page); | ||
538 | bio_put(bio); | ||
539 | |||
540 | return ret; | ||
541 | } | ||
542 | |||
543 | static int | ||
544 | bl_read_partial_page_sync(struct page *page, struct pnfs_block_extent *be, | ||
545 | unsigned int dirty_offset, unsigned int dirty_len, | ||
546 | bool full_page) | ||
547 | { | ||
548 | int ret = 0; | ||
549 | unsigned int start, end; | ||
550 | |||
551 | if (full_page) { | ||
552 | start = 0; | ||
553 | end = PAGE_CACHE_SIZE; | ||
554 | } else { | ||
555 | start = round_down(dirty_offset, SECTOR_SIZE); | ||
556 | end = round_up(dirty_offset + dirty_len, SECTOR_SIZE); | ||
557 | } | ||
558 | |||
559 | dprintk("%s: offset %u len %d\n", __func__, dirty_offset, dirty_len); | ||
560 | if (!be) { | ||
561 | zero_user_segments(page, start, dirty_offset, | ||
562 | dirty_offset + dirty_len, end); | ||
563 | if (start == 0 && end == PAGE_CACHE_SIZE && | ||
564 | trylock_page(page)) { | ||
565 | SetPageUptodate(page); | ||
566 | unlock_page(page); | ||
567 | } | ||
568 | return ret; | ||
569 | } | ||
570 | |||
571 | if (start != dirty_offset) | ||
572 | ret = bl_do_readpage_sync(page, be, start, dirty_offset - start); | ||
573 | |||
574 | if (!ret && (dirty_offset + dirty_len < end)) | ||
575 | ret = bl_do_readpage_sync(page, be, dirty_offset + dirty_len, | ||
576 | end - dirty_offset - dirty_len); | ||
577 | |||
578 | return ret; | ||
579 | } | ||
580 | |||
581 | /* Given an unmapped page, zero it or read in page for COW, page is locked | ||
582 | * by caller. | ||
583 | */ | ||
584 | static int | ||
585 | init_page_for_write(struct page *page, struct pnfs_block_extent *cow_read) | ||
586 | { | ||
587 | struct buffer_head *bh = NULL; | ||
588 | int ret = 0; | ||
589 | sector_t isect; | ||
590 | |||
591 | dprintk("%s enter, %p\n", __func__, page); | ||
592 | BUG_ON(PageUptodate(page)); | ||
593 | if (!cow_read) { | ||
594 | zero_user_segment(page, 0, PAGE_SIZE); | ||
595 | SetPageUptodate(page); | ||
596 | goto cleanup; | ||
597 | } | ||
598 | |||
599 | bh = alloc_page_buffers(page, PAGE_CACHE_SIZE, 0); | ||
600 | if (!bh) { | ||
601 | ret = -ENOMEM; | ||
602 | goto cleanup; | ||
603 | } | ||
604 | |||
605 | isect = (sector_t) page->index << PAGE_CACHE_SECTOR_SHIFT; | ||
606 | map_block(bh, isect, cow_read); | ||
607 | if (!bh_uptodate_or_lock(bh)) | ||
608 | ret = bh_submit_read(bh); | ||
609 | if (ret) | ||
610 | goto cleanup; | ||
611 | SetPageUptodate(page); | ||
612 | |||
613 | cleanup: | ||
614 | if (bh) | ||
615 | free_buffer_head(bh); | ||
616 | if (ret) { | ||
617 | /* Need to mark layout with bad read...should now | ||
618 | * just use nfs4 for reads and writes. | ||
619 | */ | ||
620 | mark_bad_read(); | ||
621 | } | ||
622 | return ret; | ||
623 | } | ||
624 | |||
625 | /* Find or create a zeroing page marked being writeback. | ||
626 | * Return ERR_PTR on error, NULL to indicate skip this page and page itself | ||
627 | * to indicate write out. | ||
628 | */ | ||
629 | static struct page * | ||
630 | bl_find_get_zeroing_page(struct inode *inode, pgoff_t index, | ||
631 | struct pnfs_block_extent *cow_read) | ||
632 | { | ||
633 | struct page *page; | ||
634 | int locked = 0; | ||
635 | page = find_get_page(inode->i_mapping, index); | ||
636 | if (page) | ||
637 | goto check_page; | ||
638 | |||
639 | page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); | ||
640 | if (unlikely(!page)) { | ||
641 | dprintk("%s oom\n", __func__); | ||
642 | return ERR_PTR(-ENOMEM); | ||
643 | } | ||
644 | locked = 1; | ||
645 | |||
646 | check_page: | ||
647 | /* PageDirty: Other will write this out | ||
648 | * PageWriteback: Other is writing this out | ||
649 | * PageUptodate: It was read before | ||
650 | */ | ||
651 | if (PageDirty(page) || PageWriteback(page)) { | ||
652 | print_page(page); | ||
653 | if (locked) | ||
654 | unlock_page(page); | ||
655 | page_cache_release(page); | ||
656 | return NULL; | ||
657 | } | ||
658 | |||
659 | if (!locked) { | ||
660 | lock_page(page); | ||
661 | locked = 1; | ||
662 | goto check_page; | ||
663 | } | ||
664 | if (!PageUptodate(page)) { | ||
665 | /* New page, readin or zero it */ | ||
666 | init_page_for_write(page, cow_read); | ||
667 | } | ||
668 | set_page_writeback(page); | ||
669 | unlock_page(page); | ||
670 | |||
671 | return page; | ||
672 | } | ||
673 | |||
674 | static enum pnfs_try_status | 369 | static enum pnfs_try_status |
675 | bl_write_pagelist(struct nfs_pgio_header *header, int sync) | 370 | bl_write_pagelist(struct nfs_pgio_header *header, int sync) |
676 | { | 371 | { |
677 | int i, ret, npg_zero, pg_index, last = 0; | 372 | struct pnfs_block_layout *bl = BLK_LSEG2EXT(header->lseg); |
373 | struct pnfs_block_dev_map map = { .start = NFS4_MAX_UINT64 }; | ||
678 | struct bio *bio = NULL; | 374 | struct bio *bio = NULL; |
679 | struct pnfs_block_extent *be = NULL, *cow_read = NULL; | 375 | struct pnfs_block_extent be; |
680 | sector_t isect, last_isect = 0, extent_length = 0; | 376 | sector_t isect, extent_length = 0; |
681 | struct parallel_io *par = NULL; | 377 | struct parallel_io *par = NULL; |
682 | loff_t offset = header->args.offset; | 378 | loff_t offset = header->args.offset; |
683 | size_t count = header->args.count; | 379 | size_t count = header->args.count; |
684 | unsigned int pg_offset, pg_len, saved_len; | ||
685 | struct page **pages = header->args.pages; | 380 | struct page **pages = header->args.pages; |
686 | struct page *page; | 381 | int pg_index = pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT; |
687 | pgoff_t index; | 382 | unsigned int pg_len; |
688 | u64 temp; | 383 | struct blk_plug plug; |
689 | int npg_per_block = | 384 | int i; |
690 | NFS_SERVER(header->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT; | ||
691 | 385 | ||
692 | dprintk("%s enter, %Zu@%lld\n", __func__, count, offset); | 386 | dprintk("%s enter, %Zu@%lld\n", __func__, count, offset); |
693 | 387 | ||
694 | if (header->dreq != NULL && | ||
695 | (!IS_ALIGNED(offset, NFS_SERVER(header->inode)->pnfs_blksize) || | ||
696 | !IS_ALIGNED(count, NFS_SERVER(header->inode)->pnfs_blksize))) { | ||
697 | dprintk("pnfsblock nonblock aligned DIO writes. Resend MDS\n"); | ||
698 | goto out_mds; | ||
699 | } | ||
700 | /* At this point, header->page_aray is a (sequential) list of nfs_pages. | 388 | /* At this point, header->page_aray is a (sequential) list of nfs_pages. |
701 | * We want to write each, and if there is an error set pnfs_error | 389 | * We want to write each, and if there is an error set pnfs_error |
702 | * to have it redone using nfs. | 390 | * to have it redone using nfs. |
703 | */ | 391 | */ |
704 | par = alloc_parallel(header); | 392 | par = alloc_parallel(header); |
705 | if (!par) | 393 | if (!par) |
706 | goto out_mds; | 394 | return PNFS_NOT_ATTEMPTED; |
707 | par->pnfs_callback = bl_end_par_io_write; | 395 | par->pnfs_callback = bl_end_par_io_write; |
708 | /* At this point, have to be more careful with error handling */ | ||
709 | 396 | ||
710 | isect = (sector_t) ((offset & (long)PAGE_CACHE_MASK) >> SECTOR_SHIFT); | 397 | blk_start_plug(&plug); |
711 | be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg), isect, &cow_read); | ||
712 | if (!be || !is_writable(be, isect)) { | ||
713 | dprintk("%s no matching extents!\n", __func__); | ||
714 | goto out_mds; | ||
715 | } | ||
716 | 398 | ||
717 | /* First page inside INVALID extent */ | 399 | /* we always write out the whole page */ |
718 | if (be->be_state == PNFS_BLOCK_INVALID_DATA) { | 400 | offset = offset & (loff_t)PAGE_CACHE_MASK; |
719 | if (likely(!bl_push_one_short_extent(be->be_inval))) | 401 | isect = offset >> SECTOR_SHIFT; |
720 | par->bse_count++; | ||
721 | else | ||
722 | goto out_mds; | ||
723 | temp = offset >> PAGE_CACHE_SHIFT; | ||
724 | npg_zero = do_div(temp, npg_per_block); | ||
725 | isect = (sector_t) (((offset - npg_zero * PAGE_CACHE_SIZE) & | ||
726 | (long)PAGE_CACHE_MASK) >> SECTOR_SHIFT); | ||
727 | extent_length = be->be_length - (isect - be->be_f_offset); | ||
728 | |||
729 | fill_invalid_ext: | ||
730 | dprintk("%s need to zero %d pages\n", __func__, npg_zero); | ||
731 | for (;npg_zero > 0; npg_zero--) { | ||
732 | if (bl_is_sector_init(be->be_inval, isect)) { | ||
733 | dprintk("isect %llu already init\n", | ||
734 | (unsigned long long)isect); | ||
735 | goto next_page; | ||
736 | } | ||
737 | /* page ref released in bl_end_io_write_zero */ | ||
738 | index = isect >> PAGE_CACHE_SECTOR_SHIFT; | ||
739 | dprintk("%s zero %dth page: index %lu isect %llu\n", | ||
740 | __func__, npg_zero, index, | ||
741 | (unsigned long long)isect); | ||
742 | page = bl_find_get_zeroing_page(header->inode, index, | ||
743 | cow_read); | ||
744 | if (unlikely(IS_ERR(page))) { | ||
745 | header->pnfs_error = PTR_ERR(page); | ||
746 | goto out; | ||
747 | } else if (page == NULL) | ||
748 | goto next_page; | ||
749 | |||
750 | ret = bl_mark_sectors_init(be->be_inval, isect, | ||
751 | PAGE_CACHE_SECTORS); | ||
752 | if (unlikely(ret)) { | ||
753 | dprintk("%s bl_mark_sectors_init fail %d\n", | ||
754 | __func__, ret); | ||
755 | end_page_writeback(page); | ||
756 | page_cache_release(page); | ||
757 | header->pnfs_error = ret; | ||
758 | goto out; | ||
759 | } | ||
760 | if (likely(!bl_push_one_short_extent(be->be_inval))) | ||
761 | par->bse_count++; | ||
762 | else { | ||
763 | end_page_writeback(page); | ||
764 | page_cache_release(page); | ||
765 | header->pnfs_error = -ENOMEM; | ||
766 | goto out; | ||
767 | } | ||
768 | /* FIXME: This should be done in bi_end_io */ | ||
769 | mark_extents_written(BLK_LSEG2EXT(header->lseg), | ||
770 | page->index << PAGE_CACHE_SHIFT, | ||
771 | PAGE_CACHE_SIZE); | ||
772 | |||
773 | bio = bl_add_page_to_bio(bio, npg_zero, WRITE, | ||
774 | isect, page, be, | ||
775 | bl_end_io_write_zero, par); | ||
776 | if (IS_ERR(bio)) { | ||
777 | header->pnfs_error = PTR_ERR(bio); | ||
778 | bio = NULL; | ||
779 | goto out; | ||
780 | } | ||
781 | next_page: | ||
782 | isect += PAGE_CACHE_SECTORS; | ||
783 | extent_length -= PAGE_CACHE_SECTORS; | ||
784 | } | ||
785 | if (last) | ||
786 | goto write_done; | ||
787 | } | ||
788 | bio = bl_submit_bio(WRITE, bio); | ||
789 | 402 | ||
790 | /* Middle pages */ | ||
791 | pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT; | ||
792 | for (i = pg_index; i < header->page_array.npages; i++) { | 403 | for (i = pg_index; i < header->page_array.npages; i++) { |
793 | if (!extent_length) { | 404 | if (extent_length <= 0) { |
794 | /* We've used up the previous extent */ | 405 | /* We've used up the previous extent */ |
795 | bl_put_extent(be); | ||
796 | bl_put_extent(cow_read); | ||
797 | bio = bl_submit_bio(WRITE, bio); | 406 | bio = bl_submit_bio(WRITE, bio); |
798 | /* Get the next one */ | 407 | /* Get the next one */ |
799 | be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg), | 408 | if (!ext_tree_lookup(bl, isect, &be, true)) { |
800 | isect, &cow_read); | ||
801 | if (!be || !is_writable(be, isect)) { | ||
802 | header->pnfs_error = -EINVAL; | 409 | header->pnfs_error = -EINVAL; |
803 | goto out; | 410 | goto out; |
804 | } | 411 | } |
805 | if (be->be_state == PNFS_BLOCK_INVALID_DATA) { | ||
806 | if (likely(!bl_push_one_short_extent( | ||
807 | be->be_inval))) | ||
808 | par->bse_count++; | ||
809 | else { | ||
810 | header->pnfs_error = -ENOMEM; | ||
811 | goto out; | ||
812 | } | ||
813 | } | ||
814 | extent_length = be->be_length - | ||
815 | (isect - be->be_f_offset); | ||
816 | } | ||
817 | |||
818 | dprintk("%s offset %lld count %Zu\n", __func__, offset, count); | ||
819 | pg_offset = offset & ~PAGE_CACHE_MASK; | ||
820 | if (pg_offset + count > PAGE_CACHE_SIZE) | ||
821 | pg_len = PAGE_CACHE_SIZE - pg_offset; | ||
822 | else | ||
823 | pg_len = count; | ||
824 | |||
825 | saved_len = pg_len; | ||
826 | if (be->be_state == PNFS_BLOCK_INVALID_DATA && | ||
827 | !bl_is_sector_init(be->be_inval, isect)) { | ||
828 | ret = bl_read_partial_page_sync(pages[i], cow_read, | ||
829 | pg_offset, pg_len, true); | ||
830 | if (ret) { | ||
831 | dprintk("%s bl_read_partial_page_sync fail %d\n", | ||
832 | __func__, ret); | ||
833 | header->pnfs_error = ret; | ||
834 | goto out; | ||
835 | } | ||
836 | |||
837 | ret = bl_mark_sectors_init(be->be_inval, isect, | ||
838 | PAGE_CACHE_SECTORS); | ||
839 | if (unlikely(ret)) { | ||
840 | dprintk("%s bl_mark_sectors_init fail %d\n", | ||
841 | __func__, ret); | ||
842 | header->pnfs_error = ret; | ||
843 | goto out; | ||
844 | } | ||
845 | 412 | ||
846 | /* Expand to full page write */ | 413 | extent_length = be.be_length - (isect - be.be_f_offset); |
847 | pg_offset = 0; | ||
848 | pg_len = PAGE_CACHE_SIZE; | ||
849 | } else if ((pg_offset & (SECTOR_SIZE - 1)) || | ||
850 | (pg_len & (SECTOR_SIZE - 1))){ | ||
851 | /* ahh, nasty case. We have to do sync full sector | ||
852 | * read-modify-write cycles. | ||
853 | */ | ||
854 | unsigned int saved_offset = pg_offset; | ||
855 | ret = bl_read_partial_page_sync(pages[i], be, pg_offset, | ||
856 | pg_len, false); | ||
857 | pg_offset = round_down(pg_offset, SECTOR_SIZE); | ||
858 | pg_len = round_up(saved_offset + pg_len, SECTOR_SIZE) | ||
859 | - pg_offset; | ||
860 | } | 414 | } |
861 | 415 | ||
862 | 416 | pg_len = PAGE_CACHE_SIZE; | |
863 | bio = do_add_page_to_bio(bio, header->page_array.npages - i, | 417 | bio = do_add_page_to_bio(bio, header->page_array.npages - i, |
864 | WRITE, | 418 | WRITE, isect, pages[i], &map, &be, |
865 | isect, pages[i], be, | ||
866 | bl_end_io_write, par, | 419 | bl_end_io_write, par, |
867 | pg_offset, pg_len); | 420 | 0, &pg_len); |
868 | if (IS_ERR(bio)) { | 421 | if (IS_ERR(bio)) { |
869 | header->pnfs_error = PTR_ERR(bio); | 422 | header->pnfs_error = PTR_ERR(bio); |
870 | bio = NULL; | 423 | bio = NULL; |
871 | goto out; | 424 | goto out; |
872 | } | 425 | } |
873 | offset += saved_len; | ||
874 | count -= saved_len; | ||
875 | isect += PAGE_CACHE_SECTORS; | ||
876 | last_isect = isect; | ||
877 | extent_length -= PAGE_CACHE_SECTORS; | ||
878 | } | ||
879 | 426 | ||
880 | /* Last page inside INVALID extent */ | 427 | offset += pg_len; |
881 | if (be->be_state == PNFS_BLOCK_INVALID_DATA) { | 428 | count -= pg_len; |
882 | bio = bl_submit_bio(WRITE, bio); | 429 | isect += (pg_len >> SECTOR_SHIFT); |
883 | temp = last_isect >> PAGE_CACHE_SECTOR_SHIFT; | 430 | extent_length -= (pg_len >> SECTOR_SHIFT); |
884 | npg_zero = npg_per_block - do_div(temp, npg_per_block); | ||
885 | if (npg_zero < npg_per_block) { | ||
886 | last = 1; | ||
887 | goto fill_invalid_ext; | ||
888 | } | ||
889 | } | 431 | } |
890 | 432 | ||
891 | write_done: | ||
892 | header->res.count = header->args.count; | 433 | header->res.count = header->args.count; |
893 | out: | 434 | out: |
894 | bl_put_extent(be); | ||
895 | bl_put_extent(cow_read); | ||
896 | bl_submit_bio(WRITE, bio); | 435 | bl_submit_bio(WRITE, bio); |
436 | blk_finish_plug(&plug); | ||
897 | put_parallel(par); | 437 | put_parallel(par); |
898 | return PNFS_ATTEMPTED; | 438 | return PNFS_ATTEMPTED; |
899 | out_mds: | ||
900 | bl_put_extent(be); | ||
901 | bl_put_extent(cow_read); | ||
902 | kfree(par); | ||
903 | return PNFS_NOT_ATTEMPTED; | ||
904 | } | ||
905 | |||
906 | /* FIXME - range ignored */ | ||
907 | static void | ||
908 | release_extents(struct pnfs_block_layout *bl, struct pnfs_layout_range *range) | ||
909 | { | ||
910 | int i; | ||
911 | struct pnfs_block_extent *be; | ||
912 | |||
913 | spin_lock(&bl->bl_ext_lock); | ||
914 | for (i = 0; i < EXTENT_LISTS; i++) { | ||
915 | while (!list_empty(&bl->bl_extents[i])) { | ||
916 | be = list_first_entry(&bl->bl_extents[i], | ||
917 | struct pnfs_block_extent, | ||
918 | be_node); | ||
919 | list_del(&be->be_node); | ||
920 | bl_put_extent(be); | ||
921 | } | ||
922 | } | ||
923 | spin_unlock(&bl->bl_ext_lock); | ||
924 | } | ||
925 | |||
926 | static void | ||
927 | release_inval_marks(struct pnfs_inval_markings *marks) | ||
928 | { | ||
929 | struct pnfs_inval_tracking *pos, *temp; | ||
930 | struct pnfs_block_short_extent *se, *stemp; | ||
931 | |||
932 | list_for_each_entry_safe(pos, temp, &marks->im_tree.mtt_stub, it_link) { | ||
933 | list_del(&pos->it_link); | ||
934 | kfree(pos); | ||
935 | } | ||
936 | |||
937 | list_for_each_entry_safe(se, stemp, &marks->im_extents, bse_node) { | ||
938 | list_del(&se->bse_node); | ||
939 | kfree(se); | ||
940 | } | ||
941 | return; | ||
942 | } | 439 | } |
943 | 440 | ||
944 | static void bl_free_layout_hdr(struct pnfs_layout_hdr *lo) | 441 | static void bl_free_layout_hdr(struct pnfs_layout_hdr *lo) |
945 | { | 442 | { |
946 | struct pnfs_block_layout *bl = BLK_LO2EXT(lo); | 443 | struct pnfs_block_layout *bl = BLK_LO2EXT(lo); |
444 | int err; | ||
947 | 445 | ||
948 | dprintk("%s enter\n", __func__); | 446 | dprintk("%s enter\n", __func__); |
949 | release_extents(bl, NULL); | 447 | |
950 | release_inval_marks(&bl->bl_inval); | 448 | err = ext_tree_remove(bl, true, 0, LLONG_MAX); |
449 | WARN_ON(err); | ||
450 | |||
951 | kfree(bl); | 451 | kfree(bl); |
952 | } | 452 | } |
953 | 453 | ||
@@ -960,14 +460,11 @@ static struct pnfs_layout_hdr *bl_alloc_layout_hdr(struct inode *inode, | |||
960 | bl = kzalloc(sizeof(*bl), gfp_flags); | 460 | bl = kzalloc(sizeof(*bl), gfp_flags); |
961 | if (!bl) | 461 | if (!bl) |
962 | return NULL; | 462 | return NULL; |
463 | |||
464 | bl->bl_ext_rw = RB_ROOT; | ||
465 | bl->bl_ext_ro = RB_ROOT; | ||
963 | spin_lock_init(&bl->bl_ext_lock); | 466 | spin_lock_init(&bl->bl_ext_lock); |
964 | INIT_LIST_HEAD(&bl->bl_extents[0]); | 467 | |
965 | INIT_LIST_HEAD(&bl->bl_extents[1]); | ||
966 | INIT_LIST_HEAD(&bl->bl_commit); | ||
967 | INIT_LIST_HEAD(&bl->bl_committing); | ||
968 | bl->bl_count = 0; | ||
969 | bl->bl_blocksize = NFS_SERVER(inode)->pnfs_blksize >> SECTOR_SHIFT; | ||
970 | BL_INIT_INVAL_MARKS(&bl->bl_inval, bl->bl_blocksize); | ||
971 | return &bl->bl_layout; | 468 | return &bl->bl_layout; |
972 | } | 469 | } |
973 | 470 | ||
@@ -977,215 +474,318 @@ static void bl_free_lseg(struct pnfs_layout_segment *lseg) | |||
977 | kfree(lseg); | 474 | kfree(lseg); |
978 | } | 475 | } |
979 | 476 | ||
980 | /* We pretty much ignore lseg, and store all data layout wide, so we | 477 | /* Tracks info needed to ensure extents in layout obey constraints of spec */ |
981 | * can correctly merge. | 478 | struct layout_verification { |
982 | */ | 479 | u32 mode; /* R or RW */ |
983 | static struct pnfs_layout_segment *bl_alloc_lseg(struct pnfs_layout_hdr *lo, | 480 | u64 start; /* Expected start of next non-COW extent */ |
984 | struct nfs4_layoutget_res *lgr, | 481 | u64 inval; /* Start of INVAL coverage */ |
985 | gfp_t gfp_flags) | 482 | u64 cowread; /* End of COW read coverage */ |
986 | { | 483 | }; |
987 | struct pnfs_layout_segment *lseg; | ||
988 | int status; | ||
989 | 484 | ||
990 | dprintk("%s enter\n", __func__); | 485 | /* Verify the extent meets the layout requirements of the pnfs-block draft, |
991 | lseg = kzalloc(sizeof(*lseg), gfp_flags); | 486 | * section 2.3.1. |
992 | if (!lseg) | 487 | */ |
993 | return ERR_PTR(-ENOMEM); | 488 | static int verify_extent(struct pnfs_block_extent *be, |
994 | status = nfs4_blk_process_layoutget(lo, lgr, gfp_flags); | 489 | struct layout_verification *lv) |
995 | if (status) { | 490 | { |
996 | /* We don't want to call the full-blown bl_free_lseg, | 491 | if (lv->mode == IOMODE_READ) { |
997 | * since on error extents were not touched. | 492 | if (be->be_state == PNFS_BLOCK_READWRITE_DATA || |
998 | */ | 493 | be->be_state == PNFS_BLOCK_INVALID_DATA) |
999 | kfree(lseg); | 494 | return -EIO; |
1000 | return ERR_PTR(status); | 495 | if (be->be_f_offset != lv->start) |
496 | return -EIO; | ||
497 | lv->start += be->be_length; | ||
498 | return 0; | ||
1001 | } | 499 | } |
1002 | return lseg; | 500 | /* lv->mode == IOMODE_RW */ |
501 | if (be->be_state == PNFS_BLOCK_READWRITE_DATA) { | ||
502 | if (be->be_f_offset != lv->start) | ||
503 | return -EIO; | ||
504 | if (lv->cowread > lv->start) | ||
505 | return -EIO; | ||
506 | lv->start += be->be_length; | ||
507 | lv->inval = lv->start; | ||
508 | return 0; | ||
509 | } else if (be->be_state == PNFS_BLOCK_INVALID_DATA) { | ||
510 | if (be->be_f_offset != lv->start) | ||
511 | return -EIO; | ||
512 | lv->start += be->be_length; | ||
513 | return 0; | ||
514 | } else if (be->be_state == PNFS_BLOCK_READ_DATA) { | ||
515 | if (be->be_f_offset > lv->start) | ||
516 | return -EIO; | ||
517 | if (be->be_f_offset < lv->inval) | ||
518 | return -EIO; | ||
519 | if (be->be_f_offset < lv->cowread) | ||
520 | return -EIO; | ||
521 | /* It looks like you might want to min this with lv->start, | ||
522 | * but you really don't. | ||
523 | */ | ||
524 | lv->inval = lv->inval + be->be_length; | ||
525 | lv->cowread = be->be_f_offset + be->be_length; | ||
526 | return 0; | ||
527 | } else | ||
528 | return -EIO; | ||
1003 | } | 529 | } |
1004 | 530 | ||
1005 | static void | 531 | static int decode_sector_number(__be32 **rp, sector_t *sp) |
1006 | bl_encode_layoutcommit(struct pnfs_layout_hdr *lo, struct xdr_stream *xdr, | ||
1007 | const struct nfs4_layoutcommit_args *arg) | ||
1008 | { | 532 | { |
1009 | dprintk("%s enter\n", __func__); | 533 | uint64_t s; |
1010 | encode_pnfs_block_layoutupdate(BLK_LO2EXT(lo), xdr, arg); | 534 | |
535 | *rp = xdr_decode_hyper(*rp, &s); | ||
536 | if (s & 0x1ff) { | ||
537 | printk(KERN_WARNING "NFS: %s: sector not aligned\n", __func__); | ||
538 | return -1; | ||
539 | } | ||
540 | *sp = s >> SECTOR_SHIFT; | ||
541 | return 0; | ||
1011 | } | 542 | } |
1012 | 543 | ||
1013 | static void | 544 | static int |
1014 | bl_cleanup_layoutcommit(struct nfs4_layoutcommit_data *lcdata) | 545 | bl_alloc_extent(struct xdr_stream *xdr, struct pnfs_layout_hdr *lo, |
546 | struct layout_verification *lv, struct list_head *extents, | ||
547 | gfp_t gfp_mask) | ||
1015 | { | 548 | { |
1016 | struct pnfs_layout_hdr *lo = NFS_I(lcdata->args.inode)->layout; | 549 | struct pnfs_block_extent *be; |
550 | struct nfs4_deviceid id; | ||
551 | int error; | ||
552 | __be32 *p; | ||
1017 | 553 | ||
1018 | dprintk("%s enter\n", __func__); | 554 | p = xdr_inline_decode(xdr, 28 + NFS4_DEVICEID4_SIZE); |
1019 | clean_pnfs_block_layoutupdate(BLK_LO2EXT(lo), &lcdata->args, lcdata->res.status); | 555 | if (!p) |
1020 | } | 556 | return -EIO; |
1021 | 557 | ||
1022 | static void free_blk_mountid(struct block_mount_id *mid) | 558 | be = kzalloc(sizeof(*be), GFP_NOFS); |
1023 | { | 559 | if (!be) |
1024 | if (mid) { | 560 | return -ENOMEM; |
1025 | struct pnfs_block_dev *dev, *tmp; | ||
1026 | 561 | ||
1027 | /* No need to take bm_lock as we are last user freeing bm_devlist */ | 562 | memcpy(&id, p, NFS4_DEVICEID4_SIZE); |
1028 | list_for_each_entry_safe(dev, tmp, &mid->bm_devlist, bm_node) { | 563 | p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE); |
1029 | list_del(&dev->bm_node); | 564 | |
1030 | bl_free_block_dev(dev); | 565 | error = -EIO; |
1031 | } | 566 | be->be_device = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), &id, |
1032 | kfree(mid); | 567 | lo->plh_lc_cred, gfp_mask); |
568 | if (!be->be_device) | ||
569 | goto out_free_be; | ||
570 | |||
571 | /* | ||
572 | * The next three values are read in as bytes, but stored in the | ||
573 | * extent structure in 512-byte granularity. | ||
574 | */ | ||
575 | if (decode_sector_number(&p, &be->be_f_offset) < 0) | ||
576 | goto out_put_deviceid; | ||
577 | if (decode_sector_number(&p, &be->be_length) < 0) | ||
578 | goto out_put_deviceid; | ||
579 | if (decode_sector_number(&p, &be->be_v_offset) < 0) | ||
580 | goto out_put_deviceid; | ||
581 | be->be_state = be32_to_cpup(p++); | ||
582 | |||
583 | error = verify_extent(be, lv); | ||
584 | if (error) { | ||
585 | dprintk("%s: extent verification failed\n", __func__); | ||
586 | goto out_put_deviceid; | ||
1033 | } | 587 | } |
588 | |||
589 | list_add_tail(&be->be_list, extents); | ||
590 | return 0; | ||
591 | |||
592 | out_put_deviceid: | ||
593 | nfs4_put_deviceid_node(be->be_device); | ||
594 | out_free_be: | ||
595 | kfree(be); | ||
596 | return error; | ||
1034 | } | 597 | } |
1035 | 598 | ||
1036 | /* This is mostly copied from the filelayout_get_device_info function. | 599 | static struct pnfs_layout_segment * |
1037 | * It seems much of this should be at the generic pnfs level. | 600 | bl_alloc_lseg(struct pnfs_layout_hdr *lo, struct nfs4_layoutget_res *lgr, |
1038 | */ | 601 | gfp_t gfp_mask) |
1039 | static struct pnfs_block_dev * | ||
1040 | nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh, | ||
1041 | struct nfs4_deviceid *d_id) | ||
1042 | { | 602 | { |
1043 | struct pnfs_device *dev; | 603 | struct layout_verification lv = { |
1044 | struct pnfs_block_dev *rv; | 604 | .mode = lgr->range.iomode, |
1045 | u32 max_resp_sz; | 605 | .start = lgr->range.offset >> SECTOR_SHIFT, |
1046 | int max_pages; | 606 | .inval = lgr->range.offset >> SECTOR_SHIFT, |
1047 | struct page **pages = NULL; | 607 | .cowread = lgr->range.offset >> SECTOR_SHIFT, |
1048 | int i, rc; | 608 | }; |
609 | struct pnfs_block_layout *bl = BLK_LO2EXT(lo); | ||
610 | struct pnfs_layout_segment *lseg; | ||
611 | struct xdr_buf buf; | ||
612 | struct xdr_stream xdr; | ||
613 | struct page *scratch; | ||
614 | int status, i; | ||
615 | uint32_t count; | ||
616 | __be32 *p; | ||
617 | LIST_HEAD(extents); | ||
618 | |||
619 | dprintk("---> %s\n", __func__); | ||
620 | |||
621 | lseg = kzalloc(sizeof(*lseg), gfp_mask); | ||
622 | if (!lseg) | ||
623 | return ERR_PTR(-ENOMEM); | ||
624 | |||
625 | status = -ENOMEM; | ||
626 | scratch = alloc_page(gfp_mask); | ||
627 | if (!scratch) | ||
628 | goto out; | ||
629 | |||
630 | xdr_init_decode_pages(&xdr, &buf, | ||
631 | lgr->layoutp->pages, lgr->layoutp->len); | ||
632 | xdr_set_scratch_buffer(&xdr, page_address(scratch), PAGE_SIZE); | ||
633 | |||
634 | status = -EIO; | ||
635 | p = xdr_inline_decode(&xdr, 4); | ||
636 | if (unlikely(!p)) | ||
637 | goto out_free_scratch; | ||
638 | |||
639 | count = be32_to_cpup(p++); | ||
640 | dprintk("%s: number of extents %d\n", __func__, count); | ||
1049 | 641 | ||
1050 | /* | 642 | /* |
1051 | * Use the session max response size as the basis for setting | 643 | * Decode individual extents, putting them in temporary staging area |
1052 | * GETDEVICEINFO's maxcount | 644 | * until whole layout is decoded to make error recovery easier. |
1053 | */ | 645 | */ |
1054 | max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; | 646 | for (i = 0; i < count; i++) { |
1055 | max_pages = nfs_page_array_len(0, max_resp_sz); | 647 | status = bl_alloc_extent(&xdr, lo, &lv, &extents, gfp_mask); |
1056 | dprintk("%s max_resp_sz %u max_pages %d\n", | 648 | if (status) |
1057 | __func__, max_resp_sz, max_pages); | 649 | goto process_extents; |
1058 | |||
1059 | dev = kmalloc(sizeof(*dev), GFP_NOFS); | ||
1060 | if (!dev) { | ||
1061 | dprintk("%s kmalloc failed\n", __func__); | ||
1062 | return ERR_PTR(-ENOMEM); | ||
1063 | } | 650 | } |
1064 | 651 | ||
1065 | pages = kcalloc(max_pages, sizeof(struct page *), GFP_NOFS); | 652 | if (lgr->range.offset + lgr->range.length != |
1066 | if (pages == NULL) { | 653 | lv.start << SECTOR_SHIFT) { |
1067 | kfree(dev); | 654 | dprintk("%s Final length mismatch\n", __func__); |
1068 | return ERR_PTR(-ENOMEM); | 655 | status = -EIO; |
656 | goto process_extents; | ||
1069 | } | 657 | } |
1070 | for (i = 0; i < max_pages; i++) { | 658 | |
1071 | pages[i] = alloc_page(GFP_NOFS); | 659 | if (lv.start < lv.cowread) { |
1072 | if (!pages[i]) { | 660 | dprintk("%s Final uncovered COW extent\n", __func__); |
1073 | rv = ERR_PTR(-ENOMEM); | 661 | status = -EIO; |
1074 | goto out_free; | ||
1075 | } | ||
1076 | } | 662 | } |
1077 | 663 | ||
1078 | memcpy(&dev->dev_id, d_id, sizeof(*d_id)); | 664 | process_extents: |
1079 | dev->layout_type = LAYOUT_BLOCK_VOLUME; | 665 | while (!list_empty(&extents)) { |
1080 | dev->pages = pages; | 666 | struct pnfs_block_extent *be = |
1081 | dev->pgbase = 0; | 667 | list_first_entry(&extents, struct pnfs_block_extent, |
1082 | dev->pglen = PAGE_SIZE * max_pages; | 668 | be_list); |
1083 | dev->mincount = 0; | 669 | list_del(&be->be_list); |
1084 | dev->maxcount = max_resp_sz - nfs41_maxgetdevinfo_overhead; | 670 | |
1085 | 671 | if (!status) | |
1086 | dprintk("%s: dev_id: %s\n", __func__, dev->dev_id.data); | 672 | status = ext_tree_insert(bl, be); |
1087 | rc = nfs4_proc_getdeviceinfo(server, dev, NULL); | 673 | |
1088 | dprintk("%s getdevice info returns %d\n", __func__, rc); | 674 | if (status) { |
1089 | if (rc) { | 675 | nfs4_put_deviceid_node(be->be_device); |
1090 | rv = ERR_PTR(rc); | 676 | kfree(be); |
1091 | goto out_free; | 677 | } |
1092 | } | 678 | } |
1093 | 679 | ||
1094 | rv = nfs4_blk_decode_device(server, dev); | 680 | out_free_scratch: |
1095 | out_free: | 681 | __free_page(scratch); |
1096 | for (i = 0; i < max_pages; i++) | 682 | out: |
1097 | __free_page(pages[i]); | 683 | dprintk("%s returns %d\n", __func__, status); |
1098 | kfree(pages); | 684 | if (status) { |
1099 | kfree(dev); | 685 | kfree(lseg); |
1100 | return rv; | 686 | return ERR_PTR(status); |
687 | } | ||
688 | return lseg; | ||
1101 | } | 689 | } |
1102 | 690 | ||
1103 | static int | 691 | static void |
1104 | bl_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *fh) | 692 | bl_return_range(struct pnfs_layout_hdr *lo, |
693 | struct pnfs_layout_range *range) | ||
1105 | { | 694 | { |
1106 | struct block_mount_id *b_mt_id = NULL; | 695 | struct pnfs_block_layout *bl = BLK_LO2EXT(lo); |
1107 | struct pnfs_devicelist *dlist = NULL; | 696 | sector_t offset = range->offset >> SECTOR_SHIFT, end; |
1108 | struct pnfs_block_dev *bdev; | ||
1109 | LIST_HEAD(block_disklist); | ||
1110 | int status, i; | ||
1111 | |||
1112 | dprintk("%s enter\n", __func__); | ||
1113 | 697 | ||
1114 | if (server->pnfs_blksize == 0) { | 698 | if (range->offset % 8) { |
1115 | dprintk("%s Server did not return blksize\n", __func__); | 699 | dprintk("%s: offset %lld not block size aligned\n", |
1116 | return -EINVAL; | 700 | __func__, range->offset); |
1117 | } | 701 | return; |
1118 | b_mt_id = kzalloc(sizeof(struct block_mount_id), GFP_NOFS); | ||
1119 | if (!b_mt_id) { | ||
1120 | status = -ENOMEM; | ||
1121 | goto out_error; | ||
1122 | } | ||
1123 | /* Initialize nfs4 block layout mount id */ | ||
1124 | spin_lock_init(&b_mt_id->bm_lock); | ||
1125 | INIT_LIST_HEAD(&b_mt_id->bm_devlist); | ||
1126 | |||
1127 | dlist = kmalloc(sizeof(struct pnfs_devicelist), GFP_NOFS); | ||
1128 | if (!dlist) { | ||
1129 | status = -ENOMEM; | ||
1130 | goto out_error; | ||
1131 | } | 702 | } |
1132 | dlist->eof = 0; | 703 | |
1133 | while (!dlist->eof) { | 704 | if (range->length != NFS4_MAX_UINT64) { |
1134 | status = nfs4_proc_getdevicelist(server, fh, dlist); | 705 | if (range->length % 8) { |
1135 | if (status) | 706 | dprintk("%s: length %lld not block size aligned\n", |
1136 | goto out_error; | 707 | __func__, range->length); |
1137 | dprintk("%s GETDEVICELIST numdevs=%i, eof=%i\n", | 708 | return; |
1138 | __func__, dlist->num_devs, dlist->eof); | ||
1139 | for (i = 0; i < dlist->num_devs; i++) { | ||
1140 | bdev = nfs4_blk_get_deviceinfo(server, fh, | ||
1141 | &dlist->dev_id[i]); | ||
1142 | if (IS_ERR(bdev)) { | ||
1143 | status = PTR_ERR(bdev); | ||
1144 | goto out_error; | ||
1145 | } | ||
1146 | spin_lock(&b_mt_id->bm_lock); | ||
1147 | list_add(&bdev->bm_node, &b_mt_id->bm_devlist); | ||
1148 | spin_unlock(&b_mt_id->bm_lock); | ||
1149 | } | 709 | } |
1150 | } | ||
1151 | dprintk("%s SUCCESS\n", __func__); | ||
1152 | server->pnfs_ld_data = b_mt_id; | ||
1153 | 710 | ||
1154 | out_return: | 711 | end = offset + (range->length >> SECTOR_SHIFT); |
1155 | kfree(dlist); | 712 | } else { |
1156 | return status; | 713 | end = round_down(NFS4_MAX_UINT64, PAGE_SIZE); |
714 | } | ||
1157 | 715 | ||
1158 | out_error: | 716 | ext_tree_remove(bl, range->iomode & IOMODE_RW, offset, end); |
1159 | free_blk_mountid(b_mt_id); | ||
1160 | goto out_return; | ||
1161 | } | 717 | } |
1162 | 718 | ||
1163 | static int | 719 | static int |
1164 | bl_clear_layoutdriver(struct nfs_server *server) | 720 | bl_prepare_layoutcommit(struct nfs4_layoutcommit_args *arg) |
721 | { | ||
722 | return ext_tree_prepare_commit(arg); | ||
723 | } | ||
724 | |||
725 | static void | ||
726 | bl_cleanup_layoutcommit(struct nfs4_layoutcommit_data *lcdata) | ||
1165 | { | 727 | { |
1166 | struct block_mount_id *b_mt_id = server->pnfs_ld_data; | 728 | ext_tree_mark_committed(&lcdata->args, lcdata->res.status); |
729 | } | ||
1167 | 730 | ||
731 | static int | ||
732 | bl_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *fh) | ||
733 | { | ||
1168 | dprintk("%s enter\n", __func__); | 734 | dprintk("%s enter\n", __func__); |
1169 | free_blk_mountid(b_mt_id); | 735 | |
1170 | dprintk("%s RETURNS\n", __func__); | 736 | if (server->pnfs_blksize == 0) { |
737 | dprintk("%s Server did not return blksize\n", __func__); | ||
738 | return -EINVAL; | ||
739 | } | ||
740 | if (server->pnfs_blksize > PAGE_SIZE) { | ||
741 | printk(KERN_ERR "%s: pNFS blksize %d not supported.\n", | ||
742 | __func__, server->pnfs_blksize); | ||
743 | return -EINVAL; | ||
744 | } | ||
745 | |||
1171 | return 0; | 746 | return 0; |
1172 | } | 747 | } |
1173 | 748 | ||
1174 | static bool | 749 | static bool |
1175 | is_aligned_req(struct nfs_page *req, unsigned int alignment) | 750 | is_aligned_req(struct nfs_pageio_descriptor *pgio, |
751 | struct nfs_page *req, unsigned int alignment) | ||
1176 | { | 752 | { |
1177 | return IS_ALIGNED(req->wb_offset, alignment) && | 753 | /* |
1178 | IS_ALIGNED(req->wb_bytes, alignment); | 754 | * Always accept buffered writes, higher layers take care of the |
755 | * right alignment. | ||
756 | */ | ||
757 | if (pgio->pg_dreq == NULL) | ||
758 | return true; | ||
759 | |||
760 | if (!IS_ALIGNED(req->wb_offset, alignment)) | ||
761 | return false; | ||
762 | |||
763 | if (IS_ALIGNED(req->wb_bytes, alignment)) | ||
764 | return true; | ||
765 | |||
766 | if (req_offset(req) + req->wb_bytes == i_size_read(pgio->pg_inode)) { | ||
767 | /* | ||
768 | * If the write goes up to the inode size, just write | ||
769 | * the full page. Data past the inode size is | ||
770 | * guaranteed to be zeroed by the higher level client | ||
771 | * code, and this behaviour is mandated by RFC 5663 | ||
772 | * section 2.3.2. | ||
773 | */ | ||
774 | return true; | ||
775 | } | ||
776 | |||
777 | return false; | ||
1179 | } | 778 | } |
1180 | 779 | ||
1181 | static void | 780 | static void |
1182 | bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) | 781 | bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) |
1183 | { | 782 | { |
1184 | if (pgio->pg_dreq != NULL && | 783 | if (!is_aligned_req(pgio, req, SECTOR_SIZE)) { |
1185 | !is_aligned_req(req, SECTOR_SIZE)) | ||
1186 | nfs_pageio_reset_read_mds(pgio); | 784 | nfs_pageio_reset_read_mds(pgio); |
1187 | else | 785 | return; |
1188 | pnfs_generic_pg_init_read(pgio, req); | 786 | } |
787 | |||
788 | pnfs_generic_pg_init_read(pgio, req); | ||
1189 | } | 789 | } |
1190 | 790 | ||
1191 | /* | 791 | /* |
@@ -1196,10 +796,8 @@ static size_t | |||
1196 | bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, | 796 | bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, |
1197 | struct nfs_page *req) | 797 | struct nfs_page *req) |
1198 | { | 798 | { |
1199 | if (pgio->pg_dreq != NULL && | 799 | if (!is_aligned_req(pgio, req, SECTOR_SIZE)) |
1200 | !is_aligned_req(req, SECTOR_SIZE)) | ||
1201 | return 0; | 800 | return 0; |
1202 | |||
1203 | return pnfs_generic_pg_test(pgio, prev, req); | 801 | return pnfs_generic_pg_test(pgio, prev, req); |
1204 | } | 802 | } |
1205 | 803 | ||
@@ -1229,19 +827,20 @@ static u64 pnfs_num_cont_bytes(struct inode *inode, pgoff_t idx) | |||
1229 | static void | 827 | static void |
1230 | bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) | 828 | bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) |
1231 | { | 829 | { |
1232 | if (pgio->pg_dreq != NULL && | 830 | u64 wb_size; |
1233 | !is_aligned_req(req, PAGE_CACHE_SIZE)) { | 831 | |
832 | if (!is_aligned_req(pgio, req, PAGE_SIZE)) { | ||
1234 | nfs_pageio_reset_write_mds(pgio); | 833 | nfs_pageio_reset_write_mds(pgio); |
1235 | } else { | 834 | return; |
1236 | u64 wb_size; | ||
1237 | if (pgio->pg_dreq == NULL) | ||
1238 | wb_size = pnfs_num_cont_bytes(pgio->pg_inode, | ||
1239 | req->wb_index); | ||
1240 | else | ||
1241 | wb_size = nfs_dreq_bytes_left(pgio->pg_dreq); | ||
1242 | |||
1243 | pnfs_generic_pg_init_write(pgio, req, wb_size); | ||
1244 | } | 835 | } |
836 | |||
837 | if (pgio->pg_dreq == NULL) | ||
838 | wb_size = pnfs_num_cont_bytes(pgio->pg_inode, | ||
839 | req->wb_index); | ||
840 | else | ||
841 | wb_size = nfs_dreq_bytes_left(pgio->pg_dreq); | ||
842 | |||
843 | pnfs_generic_pg_init_write(pgio, req, wb_size); | ||
1245 | } | 844 | } |
1246 | 845 | ||
1247 | /* | 846 | /* |
@@ -1252,10 +851,8 @@ static size_t | |||
1252 | bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, | 851 | bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, |
1253 | struct nfs_page *req) | 852 | struct nfs_page *req) |
1254 | { | 853 | { |
1255 | if (pgio->pg_dreq != NULL && | 854 | if (!is_aligned_req(pgio, req, PAGE_SIZE)) |
1256 | !is_aligned_req(req, PAGE_CACHE_SIZE)) | ||
1257 | return 0; | 855 | return 0; |
1258 | |||
1259 | return pnfs_generic_pg_test(pgio, prev, req); | 856 | return pnfs_generic_pg_test(pgio, prev, req); |
1260 | } | 857 | } |
1261 | 858 | ||
@@ -1275,146 +872,24 @@ static struct pnfs_layoutdriver_type blocklayout_type = { | |||
1275 | .id = LAYOUT_BLOCK_VOLUME, | 872 | .id = LAYOUT_BLOCK_VOLUME, |
1276 | .name = "LAYOUT_BLOCK_VOLUME", | 873 | .name = "LAYOUT_BLOCK_VOLUME", |
1277 | .owner = THIS_MODULE, | 874 | .owner = THIS_MODULE, |
875 | .flags = PNFS_LAYOUTRET_ON_SETATTR | | ||
876 | PNFS_READ_WHOLE_PAGE, | ||
1278 | .read_pagelist = bl_read_pagelist, | 877 | .read_pagelist = bl_read_pagelist, |
1279 | .write_pagelist = bl_write_pagelist, | 878 | .write_pagelist = bl_write_pagelist, |
1280 | .alloc_layout_hdr = bl_alloc_layout_hdr, | 879 | .alloc_layout_hdr = bl_alloc_layout_hdr, |
1281 | .free_layout_hdr = bl_free_layout_hdr, | 880 | .free_layout_hdr = bl_free_layout_hdr, |
1282 | .alloc_lseg = bl_alloc_lseg, | 881 | .alloc_lseg = bl_alloc_lseg, |
1283 | .free_lseg = bl_free_lseg, | 882 | .free_lseg = bl_free_lseg, |
1284 | .encode_layoutcommit = bl_encode_layoutcommit, | 883 | .return_range = bl_return_range, |
884 | .prepare_layoutcommit = bl_prepare_layoutcommit, | ||
1285 | .cleanup_layoutcommit = bl_cleanup_layoutcommit, | 885 | .cleanup_layoutcommit = bl_cleanup_layoutcommit, |
1286 | .set_layoutdriver = bl_set_layoutdriver, | 886 | .set_layoutdriver = bl_set_layoutdriver, |
1287 | .clear_layoutdriver = bl_clear_layoutdriver, | 887 | .alloc_deviceid_node = bl_alloc_deviceid_node, |
888 | .free_deviceid_node = bl_free_deviceid_node, | ||
1288 | .pg_read_ops = &bl_pg_read_ops, | 889 | .pg_read_ops = &bl_pg_read_ops, |
1289 | .pg_write_ops = &bl_pg_write_ops, | 890 | .pg_write_ops = &bl_pg_write_ops, |
1290 | }; | 891 | }; |
1291 | 892 | ||
1292 | static const struct rpc_pipe_ops bl_upcall_ops = { | ||
1293 | .upcall = rpc_pipe_generic_upcall, | ||
1294 | .downcall = bl_pipe_downcall, | ||
1295 | .destroy_msg = bl_pipe_destroy_msg, | ||
1296 | }; | ||
1297 | |||
1298 | static struct dentry *nfs4blocklayout_register_sb(struct super_block *sb, | ||
1299 | struct rpc_pipe *pipe) | ||
1300 | { | ||
1301 | struct dentry *dir, *dentry; | ||
1302 | |||
1303 | dir = rpc_d_lookup_sb(sb, NFS_PIPE_DIRNAME); | ||
1304 | if (dir == NULL) | ||
1305 | return ERR_PTR(-ENOENT); | ||
1306 | dentry = rpc_mkpipe_dentry(dir, "blocklayout", NULL, pipe); | ||
1307 | dput(dir); | ||
1308 | return dentry; | ||
1309 | } | ||
1310 | |||
1311 | static void nfs4blocklayout_unregister_sb(struct super_block *sb, | ||
1312 | struct rpc_pipe *pipe) | ||
1313 | { | ||
1314 | if (pipe->dentry) | ||
1315 | rpc_unlink(pipe->dentry); | ||
1316 | } | ||
1317 | |||
1318 | static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, | ||
1319 | void *ptr) | ||
1320 | { | ||
1321 | struct super_block *sb = ptr; | ||
1322 | struct net *net = sb->s_fs_info; | ||
1323 | struct nfs_net *nn = net_generic(net, nfs_net_id); | ||
1324 | struct dentry *dentry; | ||
1325 | int ret = 0; | ||
1326 | |||
1327 | if (!try_module_get(THIS_MODULE)) | ||
1328 | return 0; | ||
1329 | |||
1330 | if (nn->bl_device_pipe == NULL) { | ||
1331 | module_put(THIS_MODULE); | ||
1332 | return 0; | ||
1333 | } | ||
1334 | |||
1335 | switch (event) { | ||
1336 | case RPC_PIPEFS_MOUNT: | ||
1337 | dentry = nfs4blocklayout_register_sb(sb, nn->bl_device_pipe); | ||
1338 | if (IS_ERR(dentry)) { | ||
1339 | ret = PTR_ERR(dentry); | ||
1340 | break; | ||
1341 | } | ||
1342 | nn->bl_device_pipe->dentry = dentry; | ||
1343 | break; | ||
1344 | case RPC_PIPEFS_UMOUNT: | ||
1345 | if (nn->bl_device_pipe->dentry) | ||
1346 | nfs4blocklayout_unregister_sb(sb, nn->bl_device_pipe); | ||
1347 | break; | ||
1348 | default: | ||
1349 | ret = -ENOTSUPP; | ||
1350 | break; | ||
1351 | } | ||
1352 | module_put(THIS_MODULE); | ||
1353 | return ret; | ||
1354 | } | ||
1355 | |||
1356 | static struct notifier_block nfs4blocklayout_block = { | ||
1357 | .notifier_call = rpc_pipefs_event, | ||
1358 | }; | ||
1359 | |||
1360 | static struct dentry *nfs4blocklayout_register_net(struct net *net, | ||
1361 | struct rpc_pipe *pipe) | ||
1362 | { | ||
1363 | struct super_block *pipefs_sb; | ||
1364 | struct dentry *dentry; | ||
1365 | |||
1366 | pipefs_sb = rpc_get_sb_net(net); | ||
1367 | if (!pipefs_sb) | ||
1368 | return NULL; | ||
1369 | dentry = nfs4blocklayout_register_sb(pipefs_sb, pipe); | ||
1370 | rpc_put_sb_net(net); | ||
1371 | return dentry; | ||
1372 | } | ||
1373 | |||
1374 | static void nfs4blocklayout_unregister_net(struct net *net, | ||
1375 | struct rpc_pipe *pipe) | ||
1376 | { | ||
1377 | struct super_block *pipefs_sb; | ||
1378 | |||
1379 | pipefs_sb = rpc_get_sb_net(net); | ||
1380 | if (pipefs_sb) { | ||
1381 | nfs4blocklayout_unregister_sb(pipefs_sb, pipe); | ||
1382 | rpc_put_sb_net(net); | ||
1383 | } | ||
1384 | } | ||
1385 | |||
1386 | static int nfs4blocklayout_net_init(struct net *net) | ||
1387 | { | ||
1388 | struct nfs_net *nn = net_generic(net, nfs_net_id); | ||
1389 | struct dentry *dentry; | ||
1390 | |||
1391 | init_waitqueue_head(&nn->bl_wq); | ||
1392 | nn->bl_device_pipe = rpc_mkpipe_data(&bl_upcall_ops, 0); | ||
1393 | if (IS_ERR(nn->bl_device_pipe)) | ||
1394 | return PTR_ERR(nn->bl_device_pipe); | ||
1395 | dentry = nfs4blocklayout_register_net(net, nn->bl_device_pipe); | ||
1396 | if (IS_ERR(dentry)) { | ||
1397 | rpc_destroy_pipe_data(nn->bl_device_pipe); | ||
1398 | return PTR_ERR(dentry); | ||
1399 | } | ||
1400 | nn->bl_device_pipe->dentry = dentry; | ||
1401 | return 0; | ||
1402 | } | ||
1403 | |||
1404 | static void nfs4blocklayout_net_exit(struct net *net) | ||
1405 | { | ||
1406 | struct nfs_net *nn = net_generic(net, nfs_net_id); | ||
1407 | |||
1408 | nfs4blocklayout_unregister_net(net, nn->bl_device_pipe); | ||
1409 | rpc_destroy_pipe_data(nn->bl_device_pipe); | ||
1410 | nn->bl_device_pipe = NULL; | ||
1411 | } | ||
1412 | |||
1413 | static struct pernet_operations nfs4blocklayout_net_ops = { | ||
1414 | .init = nfs4blocklayout_net_init, | ||
1415 | .exit = nfs4blocklayout_net_exit, | ||
1416 | }; | ||
1417 | |||
1418 | static int __init nfs4blocklayout_init(void) | 893 | static int __init nfs4blocklayout_init(void) |
1419 | { | 894 | { |
1420 | int ret; | 895 | int ret; |
@@ -1424,20 +899,14 @@ static int __init nfs4blocklayout_init(void) | |||
1424 | ret = pnfs_register_layoutdriver(&blocklayout_type); | 899 | ret = pnfs_register_layoutdriver(&blocklayout_type); |
1425 | if (ret) | 900 | if (ret) |
1426 | goto out; | 901 | goto out; |
1427 | 902 | ret = bl_init_pipefs(); | |
1428 | ret = rpc_pipefs_notifier_register(&nfs4blocklayout_block); | ||
1429 | if (ret) | 903 | if (ret) |
1430 | goto out_remove; | 904 | goto out_unregister; |
1431 | ret = register_pernet_subsys(&nfs4blocklayout_net_ops); | 905 | return 0; |
1432 | if (ret) | ||
1433 | goto out_notifier; | ||
1434 | out: | ||
1435 | return ret; | ||
1436 | 906 | ||
1437 | out_notifier: | 907 | out_unregister: |
1438 | rpc_pipefs_notifier_unregister(&nfs4blocklayout_block); | ||
1439 | out_remove: | ||
1440 | pnfs_unregister_layoutdriver(&blocklayout_type); | 908 | pnfs_unregister_layoutdriver(&blocklayout_type); |
909 | out: | ||
1441 | return ret; | 910 | return ret; |
1442 | } | 911 | } |
1443 | 912 | ||
@@ -1446,8 +915,7 @@ static void __exit nfs4blocklayout_exit(void) | |||
1446 | dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n", | 915 | dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n", |
1447 | __func__); | 916 | __func__); |
1448 | 917 | ||
1449 | rpc_pipefs_notifier_unregister(&nfs4blocklayout_block); | 918 | bl_cleanup_pipefs(); |
1450 | unregister_pernet_subsys(&nfs4blocklayout_net_ops); | ||
1451 | pnfs_unregister_layoutdriver(&blocklayout_type); | 919 | pnfs_unregister_layoutdriver(&blocklayout_type); |
1452 | } | 920 | } |
1453 | 921 | ||
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h index 9838fb020473..92dca9e90d8d 100644 --- a/fs/nfs/blocklayout/blocklayout.h +++ b/fs/nfs/blocklayout/blocklayout.h | |||
@@ -44,105 +44,112 @@ | |||
44 | #define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT) | 44 | #define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT) |
45 | #define SECTOR_SIZE (1 << SECTOR_SHIFT) | 45 | #define SECTOR_SIZE (1 << SECTOR_SHIFT) |
46 | 46 | ||
47 | struct block_mount_id { | 47 | struct pnfs_block_dev; |
48 | spinlock_t bm_lock; /* protects list */ | ||
49 | struct list_head bm_devlist; /* holds pnfs_block_dev */ | ||
50 | }; | ||
51 | 48 | ||
52 | struct pnfs_block_dev { | 49 | enum pnfs_block_volume_type { |
53 | struct list_head bm_node; | 50 | PNFS_BLOCK_VOLUME_SIMPLE = 0, |
54 | struct nfs4_deviceid bm_mdevid; /* associated devid */ | 51 | PNFS_BLOCK_VOLUME_SLICE = 1, |
55 | struct block_device *bm_mdev; /* meta device itself */ | 52 | PNFS_BLOCK_VOLUME_CONCAT = 2, |
56 | struct net *net; | 53 | PNFS_BLOCK_VOLUME_STRIPE = 3, |
57 | }; | 54 | }; |
58 | 55 | ||
59 | enum exstate4 { | 56 | #define PNFS_BLOCK_MAX_UUIDS 4 |
60 | PNFS_BLOCK_READWRITE_DATA = 0, | 57 | #define PNFS_BLOCK_MAX_DEVICES 64 |
61 | PNFS_BLOCK_READ_DATA = 1, | 58 | |
62 | PNFS_BLOCK_INVALID_DATA = 2, /* mapped, but data is invalid */ | 59 | /* |
63 | PNFS_BLOCK_NONE_DATA = 3 /* unmapped, it's a hole */ | 60 | * Random upper cap for the uuid length to avoid unbounded allocation. |
61 | * Not actually limited by the protocol. | ||
62 | */ | ||
63 | #define PNFS_BLOCK_UUID_LEN 128 | ||
64 | |||
65 | |||
66 | struct pnfs_block_volume { | ||
67 | enum pnfs_block_volume_type type; | ||
68 | union { | ||
69 | struct { | ||
70 | int len; | ||
71 | int nr_sigs; | ||
72 | struct { | ||
73 | u64 offset; | ||
74 | u32 sig_len; | ||
75 | u8 sig[PNFS_BLOCK_UUID_LEN]; | ||
76 | } sigs[PNFS_BLOCK_MAX_UUIDS]; | ||
77 | } simple; | ||
78 | struct { | ||
79 | u64 start; | ||
80 | u64 len; | ||
81 | u32 volume; | ||
82 | } slice; | ||
83 | struct { | ||
84 | u32 volumes_count; | ||
85 | u32 volumes[PNFS_BLOCK_MAX_DEVICES]; | ||
86 | } concat; | ||
87 | struct { | ||
88 | u64 chunk_size; | ||
89 | u32 volumes_count; | ||
90 | u32 volumes[PNFS_BLOCK_MAX_DEVICES]; | ||
91 | } stripe; | ||
92 | }; | ||
64 | }; | 93 | }; |
65 | 94 | ||
66 | #define MY_MAX_TAGS (15) /* tag bitnums used must be less than this */ | 95 | struct pnfs_block_dev_map { |
96 | sector_t start; | ||
97 | sector_t len; | ||
67 | 98 | ||
68 | struct my_tree { | 99 | sector_t disk_offset; |
69 | sector_t mtt_step_size; /* Internal sector alignment */ | 100 | struct block_device *bdev; |
70 | struct list_head mtt_stub; /* Should be a radix tree */ | ||
71 | }; | 101 | }; |
72 | 102 | ||
73 | struct pnfs_inval_markings { | 103 | struct pnfs_block_dev { |
74 | spinlock_t im_lock; | 104 | struct nfs4_deviceid_node node; |
75 | struct my_tree im_tree; /* Sectors that need LAYOUTCOMMIT */ | 105 | |
76 | sector_t im_block_size; /* Server blocksize in sectors */ | 106 | u64 start; |
77 | struct list_head im_extents; /* Short extents for INVAL->RW conversion */ | 107 | u64 len; |
108 | |||
109 | u32 nr_children; | ||
110 | struct pnfs_block_dev *children; | ||
111 | u64 chunk_size; | ||
112 | |||
113 | struct block_device *bdev; | ||
114 | u64 disk_offset; | ||
115 | |||
116 | bool (*map)(struct pnfs_block_dev *dev, u64 offset, | ||
117 | struct pnfs_block_dev_map *map); | ||
78 | }; | 118 | }; |
79 | 119 | ||
80 | struct pnfs_inval_tracking { | 120 | enum exstate4 { |
81 | struct list_head it_link; | 121 | PNFS_BLOCK_READWRITE_DATA = 0, |
82 | int it_sector; | 122 | PNFS_BLOCK_READ_DATA = 1, |
83 | int it_tags; | 123 | PNFS_BLOCK_INVALID_DATA = 2, /* mapped, but data is invalid */ |
124 | PNFS_BLOCK_NONE_DATA = 3 /* unmapped, it's a hole */ | ||
84 | }; | 125 | }; |
85 | 126 | ||
86 | /* sector_t fields are all in 512-byte sectors */ | 127 | /* sector_t fields are all in 512-byte sectors */ |
87 | struct pnfs_block_extent { | 128 | struct pnfs_block_extent { |
88 | struct kref be_refcnt; | 129 | union { |
89 | struct list_head be_node; /* link into lseg list */ | 130 | struct rb_node be_node; |
90 | struct nfs4_deviceid be_devid; /* FIXME: could use device cache instead */ | 131 | struct list_head be_list; |
91 | struct block_device *be_mdev; | 132 | }; |
133 | struct nfs4_deviceid_node *be_device; | ||
92 | sector_t be_f_offset; /* the starting offset in the file */ | 134 | sector_t be_f_offset; /* the starting offset in the file */ |
93 | sector_t be_length; /* the size of the extent */ | 135 | sector_t be_length; /* the size of the extent */ |
94 | sector_t be_v_offset; /* the starting offset in the volume */ | 136 | sector_t be_v_offset; /* the starting offset in the volume */ |
95 | enum exstate4 be_state; /* the state of this extent */ | 137 | enum exstate4 be_state; /* the state of this extent */ |
96 | struct pnfs_inval_markings *be_inval; /* tracks INVAL->RW transition */ | 138 | #define EXTENT_WRITTEN 1 |
139 | #define EXTENT_COMMITTING 2 | ||
140 | unsigned int be_tag; | ||
97 | }; | 141 | }; |
98 | 142 | ||
99 | /* Shortened extent used by LAYOUTCOMMIT */ | 143 | /* on the wire size of the extent */ |
100 | struct pnfs_block_short_extent { | 144 | #define BL_EXTENT_SIZE (7 * sizeof(__be32) + NFS4_DEVICEID4_SIZE) |
101 | struct list_head bse_node; | ||
102 | struct nfs4_deviceid bse_devid; | ||
103 | struct block_device *bse_mdev; | ||
104 | sector_t bse_f_offset; /* the starting offset in the file */ | ||
105 | sector_t bse_length; /* the size of the extent */ | ||
106 | }; | ||
107 | |||
108 | static inline void | ||
109 | BL_INIT_INVAL_MARKS(struct pnfs_inval_markings *marks, sector_t blocksize) | ||
110 | { | ||
111 | spin_lock_init(&marks->im_lock); | ||
112 | INIT_LIST_HEAD(&marks->im_tree.mtt_stub); | ||
113 | INIT_LIST_HEAD(&marks->im_extents); | ||
114 | marks->im_block_size = blocksize; | ||
115 | marks->im_tree.mtt_step_size = min((sector_t)PAGE_CACHE_SECTORS, | ||
116 | blocksize); | ||
117 | } | ||
118 | |||
119 | enum extentclass4 { | ||
120 | RW_EXTENT = 0, /* READWRTE and INVAL */ | ||
121 | RO_EXTENT = 1, /* READ and NONE */ | ||
122 | EXTENT_LISTS = 2, | ||
123 | }; | ||
124 | |||
125 | static inline int bl_choose_list(enum exstate4 state) | ||
126 | { | ||
127 | if (state == PNFS_BLOCK_READ_DATA || state == PNFS_BLOCK_NONE_DATA) | ||
128 | return RO_EXTENT; | ||
129 | else | ||
130 | return RW_EXTENT; | ||
131 | } | ||
132 | 145 | ||
133 | struct pnfs_block_layout { | 146 | struct pnfs_block_layout { |
134 | struct pnfs_layout_hdr bl_layout; | 147 | struct pnfs_layout_hdr bl_layout; |
135 | struct pnfs_inval_markings bl_inval; /* tracks INVAL->RW transition */ | 148 | struct rb_root bl_ext_rw; |
149 | struct rb_root bl_ext_ro; | ||
136 | spinlock_t bl_ext_lock; /* Protects list manipulation */ | 150 | spinlock_t bl_ext_lock; /* Protects list manipulation */ |
137 | struct list_head bl_extents[EXTENT_LISTS]; /* R and RW extents */ | ||
138 | struct list_head bl_commit; /* Needs layout commit */ | ||
139 | struct list_head bl_committing; /* Layout committing */ | ||
140 | unsigned int bl_count; /* entries in bl_commit */ | ||
141 | sector_t bl_blocksize; /* Server blocksize in sectors */ | ||
142 | }; | 151 | }; |
143 | 152 | ||
144 | #define BLK_ID(lo) ((struct block_mount_id *)(NFS_SERVER(lo->plh_inode)->pnfs_ld_data)) | ||
145 | |||
146 | static inline struct pnfs_block_layout * | 153 | static inline struct pnfs_block_layout * |
147 | BLK_LO2EXT(struct pnfs_layout_hdr *lo) | 154 | BLK_LO2EXT(struct pnfs_layout_hdr *lo) |
148 | { | 155 | { |
@@ -171,41 +178,27 @@ struct bl_msg_hdr { | |||
171 | #define BL_DEVICE_REQUEST_PROC 0x1 /* User level process succeeds */ | 178 | #define BL_DEVICE_REQUEST_PROC 0x1 /* User level process succeeds */ |
172 | #define BL_DEVICE_REQUEST_ERR 0x2 /* User level process fails */ | 179 | #define BL_DEVICE_REQUEST_ERR 0x2 /* User level process fails */ |
173 | 180 | ||
174 | /* blocklayoutdev.c */ | 181 | /* dev.c */ |
175 | ssize_t bl_pipe_downcall(struct file *, const char __user *, size_t); | 182 | struct nfs4_deviceid_node *bl_alloc_deviceid_node(struct nfs_server *server, |
176 | void bl_pipe_destroy_msg(struct rpc_pipe_msg *); | 183 | struct pnfs_device *pdev, gfp_t gfp_mask); |
177 | void nfs4_blkdev_put(struct block_device *bdev); | 184 | void bl_free_deviceid_node(struct nfs4_deviceid_node *d); |
178 | struct pnfs_block_dev *nfs4_blk_decode_device(struct nfs_server *server, | 185 | |
179 | struct pnfs_device *dev); | 186 | /* extent_tree.c */ |
180 | int nfs4_blk_process_layoutget(struct pnfs_layout_hdr *lo, | 187 | int ext_tree_insert(struct pnfs_block_layout *bl, |
181 | struct nfs4_layoutget_res *lgr, gfp_t gfp_flags); | 188 | struct pnfs_block_extent *new); |
182 | 189 | int ext_tree_remove(struct pnfs_block_layout *bl, bool rw, sector_t start, | |
183 | /* blocklayoutdm.c */ | 190 | sector_t end); |
184 | void bl_free_block_dev(struct pnfs_block_dev *bdev); | 191 | int ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start, |
185 | 192 | sector_t len); | |
186 | /* extents.c */ | 193 | bool ext_tree_lookup(struct pnfs_block_layout *bl, sector_t isect, |
187 | struct pnfs_block_extent * | 194 | struct pnfs_block_extent *ret, bool rw); |
188 | bl_find_get_extent(struct pnfs_block_layout *bl, sector_t isect, | 195 | int ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg); |
189 | struct pnfs_block_extent **cow_read); | 196 | void ext_tree_mark_committed(struct nfs4_layoutcommit_args *arg, int status); |
190 | int bl_mark_sectors_init(struct pnfs_inval_markings *marks, | 197 | |
191 | sector_t offset, sector_t length); | 198 | /* rpc_pipefs.c */ |
192 | void bl_put_extent(struct pnfs_block_extent *be); | 199 | dev_t bl_resolve_deviceid(struct nfs_server *server, |
193 | struct pnfs_block_extent *bl_alloc_extent(void); | 200 | struct pnfs_block_volume *b, gfp_t gfp_mask); |
194 | int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect); | 201 | int __init bl_init_pipefs(void); |
195 | int encode_pnfs_block_layoutupdate(struct pnfs_block_layout *bl, | 202 | void __exit bl_cleanup_pipefs(void); |
196 | struct xdr_stream *xdr, | ||
197 | const struct nfs4_layoutcommit_args *arg); | ||
198 | void clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl, | ||
199 | const struct nfs4_layoutcommit_args *arg, | ||
200 | int status); | ||
201 | int bl_add_merge_extent(struct pnfs_block_layout *bl, | ||
202 | struct pnfs_block_extent *new); | ||
203 | int bl_mark_for_commit(struct pnfs_block_extent *be, | ||
204 | sector_t offset, sector_t length, | ||
205 | struct pnfs_block_short_extent *new); | ||
206 | int bl_push_one_short_extent(struct pnfs_inval_markings *marks); | ||
207 | struct pnfs_block_short_extent * | ||
208 | bl_pop_one_short_extent(struct pnfs_inval_markings *marks); | ||
209 | void bl_free_short_extents(struct pnfs_inval_markings *marks, int num_to_free); | ||
210 | 203 | ||
211 | #endif /* FS_NFS_NFS4BLOCKLAYOUT_H */ | 204 | #endif /* FS_NFS_NFS4BLOCKLAYOUT_H */ |
diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c deleted file mode 100644 index 04303b5c9361..000000000000 --- a/fs/nfs/blocklayout/blocklayoutdev.c +++ /dev/null | |||
@@ -1,384 +0,0 @@ | |||
1 | /* | ||
2 | * linux/fs/nfs/blocklayout/blocklayoutdev.c | ||
3 | * | ||
4 | * Device operations for the pnfs nfs4 file layout driver. | ||
5 | * | ||
6 | * Copyright (c) 2006 The Regents of the University of Michigan. | ||
7 | * All rights reserved. | ||
8 | * | ||
9 | * Andy Adamson <andros@citi.umich.edu> | ||
10 | * Fred Isaman <iisaman@umich.edu> | ||
11 | * | ||
12 | * permission is granted to use, copy, create derivative works and | ||
13 | * redistribute this software and such derivative works for any purpose, | ||
14 | * so long as the name of the university of michigan is not used in | ||
15 | * any advertising or publicity pertaining to the use or distribution | ||
16 | * of this software without specific, written prior authorization. if | ||
17 | * the above copyright notice or any other identification of the | ||
18 | * university of michigan is included in any copy of any portion of | ||
19 | * this software, then the disclaimer below must also be included. | ||
20 | * | ||
21 | * this software is provided as is, without representation from the | ||
22 | * university of michigan as to its fitness for any purpose, and without | ||
23 | * warranty by the university of michigan of any kind, either express | ||
24 | * or implied, including without limitation the implied warranties of | ||
25 | * merchantability and fitness for a particular purpose. the regents | ||
26 | * of the university of michigan shall not be liable for any damages, | ||
27 | * including special, indirect, incidental, or consequential damages, | ||
28 | * with respect to any claim arising out or in connection with the use | ||
29 | * of the software, even if it has been or is hereafter advised of the | ||
30 | * possibility of such damages. | ||
31 | */ | ||
32 | #include <linux/module.h> | ||
33 | #include <linux/buffer_head.h> /* __bread */ | ||
34 | |||
35 | #include <linux/genhd.h> | ||
36 | #include <linux/blkdev.h> | ||
37 | #include <linux/hash.h> | ||
38 | |||
39 | #include "blocklayout.h" | ||
40 | |||
41 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | ||
42 | |||
43 | static int decode_sector_number(__be32 **rp, sector_t *sp) | ||
44 | { | ||
45 | uint64_t s; | ||
46 | |||
47 | *rp = xdr_decode_hyper(*rp, &s); | ||
48 | if (s & 0x1ff) { | ||
49 | printk(KERN_WARNING "NFS: %s: sector not aligned\n", __func__); | ||
50 | return -1; | ||
51 | } | ||
52 | *sp = s >> SECTOR_SHIFT; | ||
53 | return 0; | ||
54 | } | ||
55 | |||
56 | /* | ||
57 | * Release the block device | ||
58 | */ | ||
59 | void nfs4_blkdev_put(struct block_device *bdev) | ||
60 | { | ||
61 | dprintk("%s for device %d:%d\n", __func__, MAJOR(bdev->bd_dev), | ||
62 | MINOR(bdev->bd_dev)); | ||
63 | blkdev_put(bdev, FMODE_READ); | ||
64 | } | ||
65 | |||
66 | ssize_t bl_pipe_downcall(struct file *filp, const char __user *src, | ||
67 | size_t mlen) | ||
68 | { | ||
69 | struct nfs_net *nn = net_generic(filp->f_dentry->d_sb->s_fs_info, | ||
70 | nfs_net_id); | ||
71 | |||
72 | if (mlen != sizeof (struct bl_dev_msg)) | ||
73 | return -EINVAL; | ||
74 | |||
75 | if (copy_from_user(&nn->bl_mount_reply, src, mlen) != 0) | ||
76 | return -EFAULT; | ||
77 | |||
78 | wake_up(&nn->bl_wq); | ||
79 | |||
80 | return mlen; | ||
81 | } | ||
82 | |||
83 | void bl_pipe_destroy_msg(struct rpc_pipe_msg *msg) | ||
84 | { | ||
85 | struct bl_pipe_msg *bl_pipe_msg = container_of(msg, struct bl_pipe_msg, msg); | ||
86 | |||
87 | if (msg->errno >= 0) | ||
88 | return; | ||
89 | wake_up(bl_pipe_msg->bl_wq); | ||
90 | } | ||
91 | |||
92 | /* | ||
93 | * Decodes pnfs_block_deviceaddr4 which is XDR encoded in dev->dev_addr_buf. | ||
94 | */ | ||
95 | struct pnfs_block_dev * | ||
96 | nfs4_blk_decode_device(struct nfs_server *server, | ||
97 | struct pnfs_device *dev) | ||
98 | { | ||
99 | struct pnfs_block_dev *rv; | ||
100 | struct block_device *bd = NULL; | ||
101 | struct bl_pipe_msg bl_pipe_msg; | ||
102 | struct rpc_pipe_msg *msg = &bl_pipe_msg.msg; | ||
103 | struct bl_msg_hdr bl_msg = { | ||
104 | .type = BL_DEVICE_MOUNT, | ||
105 | .totallen = dev->mincount, | ||
106 | }; | ||
107 | uint8_t *dataptr; | ||
108 | DECLARE_WAITQUEUE(wq, current); | ||
109 | int offset, len, i, rc; | ||
110 | struct net *net = server->nfs_client->cl_net; | ||
111 | struct nfs_net *nn = net_generic(net, nfs_net_id); | ||
112 | struct bl_dev_msg *reply = &nn->bl_mount_reply; | ||
113 | |||
114 | dprintk("%s CREATING PIPEFS MESSAGE\n", __func__); | ||
115 | dprintk("%s: deviceid: %s, mincount: %d\n", __func__, dev->dev_id.data, | ||
116 | dev->mincount); | ||
117 | |||
118 | bl_pipe_msg.bl_wq = &nn->bl_wq; | ||
119 | memset(msg, 0, sizeof(*msg)); | ||
120 | msg->data = kzalloc(sizeof(bl_msg) + dev->mincount, GFP_NOFS); | ||
121 | if (!msg->data) { | ||
122 | rv = ERR_PTR(-ENOMEM); | ||
123 | goto out; | ||
124 | } | ||
125 | |||
126 | memcpy(msg->data, &bl_msg, sizeof(bl_msg)); | ||
127 | dataptr = (uint8_t *) msg->data; | ||
128 | len = dev->mincount; | ||
129 | offset = sizeof(bl_msg); | ||
130 | for (i = 0; len > 0; i++) { | ||
131 | memcpy(&dataptr[offset], page_address(dev->pages[i]), | ||
132 | len < PAGE_CACHE_SIZE ? len : PAGE_CACHE_SIZE); | ||
133 | len -= PAGE_CACHE_SIZE; | ||
134 | offset += PAGE_CACHE_SIZE; | ||
135 | } | ||
136 | msg->len = sizeof(bl_msg) + dev->mincount; | ||
137 | |||
138 | dprintk("%s CALLING USERSPACE DAEMON\n", __func__); | ||
139 | add_wait_queue(&nn->bl_wq, &wq); | ||
140 | rc = rpc_queue_upcall(nn->bl_device_pipe, msg); | ||
141 | if (rc < 0) { | ||
142 | remove_wait_queue(&nn->bl_wq, &wq); | ||
143 | rv = ERR_PTR(rc); | ||
144 | goto out; | ||
145 | } | ||
146 | |||
147 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
148 | schedule(); | ||
149 | __set_current_state(TASK_RUNNING); | ||
150 | remove_wait_queue(&nn->bl_wq, &wq); | ||
151 | |||
152 | if (reply->status != BL_DEVICE_REQUEST_PROC) { | ||
153 | dprintk("%s failed to open device: %d\n", | ||
154 | __func__, reply->status); | ||
155 | rv = ERR_PTR(-EINVAL); | ||
156 | goto out; | ||
157 | } | ||
158 | |||
159 | bd = blkdev_get_by_dev(MKDEV(reply->major, reply->minor), | ||
160 | FMODE_READ, NULL); | ||
161 | if (IS_ERR(bd)) { | ||
162 | dprintk("%s failed to open device : %ld\n", __func__, | ||
163 | PTR_ERR(bd)); | ||
164 | rv = ERR_CAST(bd); | ||
165 | goto out; | ||
166 | } | ||
167 | |||
168 | rv = kzalloc(sizeof(*rv), GFP_NOFS); | ||
169 | if (!rv) { | ||
170 | rv = ERR_PTR(-ENOMEM); | ||
171 | goto out; | ||
172 | } | ||
173 | |||
174 | rv->bm_mdev = bd; | ||
175 | memcpy(&rv->bm_mdevid, &dev->dev_id, sizeof(struct nfs4_deviceid)); | ||
176 | rv->net = net; | ||
177 | dprintk("%s Created device %s with bd_block_size %u\n", | ||
178 | __func__, | ||
179 | bd->bd_disk->disk_name, | ||
180 | bd->bd_block_size); | ||
181 | |||
182 | out: | ||
183 | kfree(msg->data); | ||
184 | return rv; | ||
185 | } | ||
186 | |||
187 | /* Map deviceid returned by the server to constructed block_device */ | ||
188 | static struct block_device *translate_devid(struct pnfs_layout_hdr *lo, | ||
189 | struct nfs4_deviceid *id) | ||
190 | { | ||
191 | struct block_device *rv = NULL; | ||
192 | struct block_mount_id *mid; | ||
193 | struct pnfs_block_dev *dev; | ||
194 | |||
195 | dprintk("%s enter, lo=%p, id=%p\n", __func__, lo, id); | ||
196 | mid = BLK_ID(lo); | ||
197 | spin_lock(&mid->bm_lock); | ||
198 | list_for_each_entry(dev, &mid->bm_devlist, bm_node) { | ||
199 | if (memcmp(id->data, dev->bm_mdevid.data, | ||
200 | NFS4_DEVICEID4_SIZE) == 0) { | ||
201 | rv = dev->bm_mdev; | ||
202 | goto out; | ||
203 | } | ||
204 | } | ||
205 | out: | ||
206 | spin_unlock(&mid->bm_lock); | ||
207 | dprintk("%s returning %p\n", __func__, rv); | ||
208 | return rv; | ||
209 | } | ||
210 | |||
211 | /* Tracks info needed to ensure extents in layout obey constraints of spec */ | ||
212 | struct layout_verification { | ||
213 | u32 mode; /* R or RW */ | ||
214 | u64 start; /* Expected start of next non-COW extent */ | ||
215 | u64 inval; /* Start of INVAL coverage */ | ||
216 | u64 cowread; /* End of COW read coverage */ | ||
217 | }; | ||
218 | |||
219 | /* Verify the extent meets the layout requirements of the pnfs-block draft, | ||
220 | * section 2.3.1. | ||
221 | */ | ||
222 | static int verify_extent(struct pnfs_block_extent *be, | ||
223 | struct layout_verification *lv) | ||
224 | { | ||
225 | if (lv->mode == IOMODE_READ) { | ||
226 | if (be->be_state == PNFS_BLOCK_READWRITE_DATA || | ||
227 | be->be_state == PNFS_BLOCK_INVALID_DATA) | ||
228 | return -EIO; | ||
229 | if (be->be_f_offset != lv->start) | ||
230 | return -EIO; | ||
231 | lv->start += be->be_length; | ||
232 | return 0; | ||
233 | } | ||
234 | /* lv->mode == IOMODE_RW */ | ||
235 | if (be->be_state == PNFS_BLOCK_READWRITE_DATA) { | ||
236 | if (be->be_f_offset != lv->start) | ||
237 | return -EIO; | ||
238 | if (lv->cowread > lv->start) | ||
239 | return -EIO; | ||
240 | lv->start += be->be_length; | ||
241 | lv->inval = lv->start; | ||
242 | return 0; | ||
243 | } else if (be->be_state == PNFS_BLOCK_INVALID_DATA) { | ||
244 | if (be->be_f_offset != lv->start) | ||
245 | return -EIO; | ||
246 | lv->start += be->be_length; | ||
247 | return 0; | ||
248 | } else if (be->be_state == PNFS_BLOCK_READ_DATA) { | ||
249 | if (be->be_f_offset > lv->start) | ||
250 | return -EIO; | ||
251 | if (be->be_f_offset < lv->inval) | ||
252 | return -EIO; | ||
253 | if (be->be_f_offset < lv->cowread) | ||
254 | return -EIO; | ||
255 | /* It looks like you might want to min this with lv->start, | ||
256 | * but you really don't. | ||
257 | */ | ||
258 | lv->inval = lv->inval + be->be_length; | ||
259 | lv->cowread = be->be_f_offset + be->be_length; | ||
260 | return 0; | ||
261 | } else | ||
262 | return -EIO; | ||
263 | } | ||
264 | |||
265 | /* XDR decode pnfs_block_layout4 structure */ | ||
266 | int | ||
267 | nfs4_blk_process_layoutget(struct pnfs_layout_hdr *lo, | ||
268 | struct nfs4_layoutget_res *lgr, gfp_t gfp_flags) | ||
269 | { | ||
270 | struct pnfs_block_layout *bl = BLK_LO2EXT(lo); | ||
271 | int i, status = -EIO; | ||
272 | uint32_t count; | ||
273 | struct pnfs_block_extent *be = NULL, *save; | ||
274 | struct xdr_stream stream; | ||
275 | struct xdr_buf buf; | ||
276 | struct page *scratch; | ||
277 | __be32 *p; | ||
278 | struct layout_verification lv = { | ||
279 | .mode = lgr->range.iomode, | ||
280 | .start = lgr->range.offset >> SECTOR_SHIFT, | ||
281 | .inval = lgr->range.offset >> SECTOR_SHIFT, | ||
282 | .cowread = lgr->range.offset >> SECTOR_SHIFT, | ||
283 | }; | ||
284 | LIST_HEAD(extents); | ||
285 | |||
286 | dprintk("---> %s\n", __func__); | ||
287 | |||
288 | scratch = alloc_page(gfp_flags); | ||
289 | if (!scratch) | ||
290 | return -ENOMEM; | ||
291 | |||
292 | xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, lgr->layoutp->len); | ||
293 | xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); | ||
294 | |||
295 | p = xdr_inline_decode(&stream, 4); | ||
296 | if (unlikely(!p)) | ||
297 | goto out_err; | ||
298 | |||
299 | count = be32_to_cpup(p++); | ||
300 | |||
301 | dprintk("%s enter, number of extents %i\n", __func__, count); | ||
302 | p = xdr_inline_decode(&stream, (28 + NFS4_DEVICEID4_SIZE) * count); | ||
303 | if (unlikely(!p)) | ||
304 | goto out_err; | ||
305 | |||
306 | /* Decode individual extents, putting them in temporary | ||
307 | * staging area until whole layout is decoded to make error | ||
308 | * recovery easier. | ||
309 | */ | ||
310 | for (i = 0; i < count; i++) { | ||
311 | be = bl_alloc_extent(); | ||
312 | if (!be) { | ||
313 | status = -ENOMEM; | ||
314 | goto out_err; | ||
315 | } | ||
316 | memcpy(&be->be_devid, p, NFS4_DEVICEID4_SIZE); | ||
317 | p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE); | ||
318 | be->be_mdev = translate_devid(lo, &be->be_devid); | ||
319 | if (!be->be_mdev) | ||
320 | goto out_err; | ||
321 | |||
322 | /* The next three values are read in as bytes, | ||
323 | * but stored as 512-byte sector lengths | ||
324 | */ | ||
325 | if (decode_sector_number(&p, &be->be_f_offset) < 0) | ||
326 | goto out_err; | ||
327 | if (decode_sector_number(&p, &be->be_length) < 0) | ||
328 | goto out_err; | ||
329 | if (decode_sector_number(&p, &be->be_v_offset) < 0) | ||
330 | goto out_err; | ||
331 | be->be_state = be32_to_cpup(p++); | ||
332 | if (be->be_state == PNFS_BLOCK_INVALID_DATA) | ||
333 | be->be_inval = &bl->bl_inval; | ||
334 | if (verify_extent(be, &lv)) { | ||
335 | dprintk("%s verify failed\n", __func__); | ||
336 | goto out_err; | ||
337 | } | ||
338 | list_add_tail(&be->be_node, &extents); | ||
339 | } | ||
340 | if (lgr->range.offset + lgr->range.length != | ||
341 | lv.start << SECTOR_SHIFT) { | ||
342 | dprintk("%s Final length mismatch\n", __func__); | ||
343 | be = NULL; | ||
344 | goto out_err; | ||
345 | } | ||
346 | if (lv.start < lv.cowread) { | ||
347 | dprintk("%s Final uncovered COW extent\n", __func__); | ||
348 | be = NULL; | ||
349 | goto out_err; | ||
350 | } | ||
351 | /* Extents decoded properly, now try to merge them in to | ||
352 | * existing layout extents. | ||
353 | */ | ||
354 | spin_lock(&bl->bl_ext_lock); | ||
355 | list_for_each_entry_safe(be, save, &extents, be_node) { | ||
356 | list_del(&be->be_node); | ||
357 | status = bl_add_merge_extent(bl, be); | ||
358 | if (status) { | ||
359 | spin_unlock(&bl->bl_ext_lock); | ||
360 | /* This is a fairly catastrophic error, as the | ||
361 | * entire layout extent lists are now corrupted. | ||
362 | * We should have some way to distinguish this. | ||
363 | */ | ||
364 | be = NULL; | ||
365 | goto out_err; | ||
366 | } | ||
367 | } | ||
368 | spin_unlock(&bl->bl_ext_lock); | ||
369 | status = 0; | ||
370 | out: | ||
371 | __free_page(scratch); | ||
372 | dprintk("%s returns %i\n", __func__, status); | ||
373 | return status; | ||
374 | |||
375 | out_err: | ||
376 | bl_put_extent(be); | ||
377 | while (!list_empty(&extents)) { | ||
378 | be = list_first_entry(&extents, struct pnfs_block_extent, | ||
379 | be_node); | ||
380 | list_del(&be->be_node); | ||
381 | bl_put_extent(be); | ||
382 | } | ||
383 | goto out; | ||
384 | } | ||
diff --git a/fs/nfs/blocklayout/blocklayoutdm.c b/fs/nfs/blocklayout/blocklayoutdm.c deleted file mode 100644 index 8999cfddd866..000000000000 --- a/fs/nfs/blocklayout/blocklayoutdm.c +++ /dev/null | |||
@@ -1,108 +0,0 @@ | |||
1 | /* | ||
2 | * linux/fs/nfs/blocklayout/blocklayoutdm.c | ||
3 | * | ||
4 | * Module for the NFSv4.1 pNFS block layout driver. | ||
5 | * | ||
6 | * Copyright (c) 2007 The Regents of the University of Michigan. | ||
7 | * All rights reserved. | ||
8 | * | ||
9 | * Fred Isaman <iisaman@umich.edu> | ||
10 | * Andy Adamson <andros@citi.umich.edu> | ||
11 | * | ||
12 | * permission is granted to use, copy, create derivative works and | ||
13 | * redistribute this software and such derivative works for any purpose, | ||
14 | * so long as the name of the university of michigan is not used in | ||
15 | * any advertising or publicity pertaining to the use or distribution | ||
16 | * of this software without specific, written prior authorization. if | ||
17 | * the above copyright notice or any other identification of the | ||
18 | * university of michigan is included in any copy of any portion of | ||
19 | * this software, then the disclaimer below must also be included. | ||
20 | * | ||
21 | * this software is provided as is, without representation from the | ||
22 | * university of michigan as to its fitness for any purpose, and without | ||
23 | * warranty by the university of michigan of any kind, either express | ||
24 | * or implied, including without limitation the implied warranties of | ||
25 | * merchantability and fitness for a particular purpose. the regents | ||
26 | * of the university of michigan shall not be liable for any damages, | ||
27 | * including special, indirect, incidental, or consequential damages, | ||
28 | * with respect to any claim arising out or in connection with the use | ||
29 | * of the software, even if it has been or is hereafter advised of the | ||
30 | * possibility of such damages. | ||
31 | */ | ||
32 | |||
33 | #include <linux/genhd.h> /* gendisk - used in a dprintk*/ | ||
34 | #include <linux/sched.h> | ||
35 | #include <linux/hash.h> | ||
36 | |||
37 | #include "blocklayout.h" | ||
38 | |||
39 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | ||
40 | |||
41 | static void dev_remove(struct net *net, dev_t dev) | ||
42 | { | ||
43 | struct bl_pipe_msg bl_pipe_msg; | ||
44 | struct rpc_pipe_msg *msg = &bl_pipe_msg.msg; | ||
45 | struct bl_dev_msg bl_umount_request; | ||
46 | struct bl_msg_hdr bl_msg = { | ||
47 | .type = BL_DEVICE_UMOUNT, | ||
48 | .totallen = sizeof(bl_umount_request), | ||
49 | }; | ||
50 | uint8_t *dataptr; | ||
51 | DECLARE_WAITQUEUE(wq, current); | ||
52 | struct nfs_net *nn = net_generic(net, nfs_net_id); | ||
53 | |||
54 | dprintk("Entering %s\n", __func__); | ||
55 | |||
56 | bl_pipe_msg.bl_wq = &nn->bl_wq; | ||
57 | memset(msg, 0, sizeof(*msg)); | ||
58 | msg->len = sizeof(bl_msg) + bl_msg.totallen; | ||
59 | msg->data = kzalloc(msg->len, GFP_NOFS); | ||
60 | if (!msg->data) | ||
61 | goto out; | ||
62 | |||
63 | memset(&bl_umount_request, 0, sizeof(bl_umount_request)); | ||
64 | bl_umount_request.major = MAJOR(dev); | ||
65 | bl_umount_request.minor = MINOR(dev); | ||
66 | |||
67 | memcpy(msg->data, &bl_msg, sizeof(bl_msg)); | ||
68 | dataptr = (uint8_t *) msg->data; | ||
69 | memcpy(&dataptr[sizeof(bl_msg)], &bl_umount_request, sizeof(bl_umount_request)); | ||
70 | |||
71 | add_wait_queue(&nn->bl_wq, &wq); | ||
72 | if (rpc_queue_upcall(nn->bl_device_pipe, msg) < 0) { | ||
73 | remove_wait_queue(&nn->bl_wq, &wq); | ||
74 | goto out; | ||
75 | } | ||
76 | |||
77 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
78 | schedule(); | ||
79 | __set_current_state(TASK_RUNNING); | ||
80 | remove_wait_queue(&nn->bl_wq, &wq); | ||
81 | |||
82 | out: | ||
83 | kfree(msg->data); | ||
84 | } | ||
85 | |||
86 | /* | ||
87 | * Release meta device | ||
88 | */ | ||
89 | static void nfs4_blk_metadev_release(struct pnfs_block_dev *bdev) | ||
90 | { | ||
91 | dprintk("%s Releasing\n", __func__); | ||
92 | nfs4_blkdev_put(bdev->bm_mdev); | ||
93 | dev_remove(bdev->net, bdev->bm_mdev->bd_dev); | ||
94 | } | ||
95 | |||
96 | void bl_free_block_dev(struct pnfs_block_dev *bdev) | ||
97 | { | ||
98 | if (bdev) { | ||
99 | if (bdev->bm_mdev) { | ||
100 | dprintk("%s Removing DM device: %d:%d\n", | ||
101 | __func__, | ||
102 | MAJOR(bdev->bm_mdev->bd_dev), | ||
103 | MINOR(bdev->bm_mdev->bd_dev)); | ||
104 | nfs4_blk_metadev_release(bdev); | ||
105 | } | ||
106 | kfree(bdev); | ||
107 | } | ||
108 | } | ||
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c new file mode 100644 index 000000000000..5aed4f98df41 --- /dev/null +++ b/fs/nfs/blocklayout/dev.c | |||
@@ -0,0 +1,363 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2014 Christoph Hellwig. | ||
3 | */ | ||
4 | #include <linux/sunrpc/svc.h> | ||
5 | #include <linux/blkdev.h> | ||
6 | #include <linux/nfs4.h> | ||
7 | #include <linux/nfs_fs.h> | ||
8 | #include <linux/nfs_xdr.h> | ||
9 | |||
10 | #include "blocklayout.h" | ||
11 | |||
12 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | ||
13 | |||
14 | static void | ||
15 | bl_free_device(struct pnfs_block_dev *dev) | ||
16 | { | ||
17 | if (dev->nr_children) { | ||
18 | int i; | ||
19 | |||
20 | for (i = 0; i < dev->nr_children; i++) | ||
21 | bl_free_device(&dev->children[i]); | ||
22 | kfree(dev->children); | ||
23 | } else { | ||
24 | if (dev->bdev) | ||
25 | blkdev_put(dev->bdev, FMODE_READ); | ||
26 | } | ||
27 | } | ||
28 | |||
29 | void | ||
30 | bl_free_deviceid_node(struct nfs4_deviceid_node *d) | ||
31 | { | ||
32 | struct pnfs_block_dev *dev = | ||
33 | container_of(d, struct pnfs_block_dev, node); | ||
34 | |||
35 | bl_free_device(dev); | ||
36 | kfree(dev); | ||
37 | } | ||
38 | |||
39 | static int | ||
40 | nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b) | ||
41 | { | ||
42 | __be32 *p; | ||
43 | int i; | ||
44 | |||
45 | p = xdr_inline_decode(xdr, 4); | ||
46 | if (!p) | ||
47 | return -EIO; | ||
48 | b->type = be32_to_cpup(p++); | ||
49 | |||
50 | switch (b->type) { | ||
51 | case PNFS_BLOCK_VOLUME_SIMPLE: | ||
52 | p = xdr_inline_decode(xdr, 4); | ||
53 | if (!p) | ||
54 | return -EIO; | ||
55 | b->simple.nr_sigs = be32_to_cpup(p++); | ||
56 | if (!b->simple.nr_sigs) { | ||
57 | dprintk("no signature\n"); | ||
58 | return -EIO; | ||
59 | } | ||
60 | |||
61 | b->simple.len = 4 + 4; | ||
62 | for (i = 0; i < b->simple.nr_sigs; i++) { | ||
63 | p = xdr_inline_decode(xdr, 8 + 4); | ||
64 | if (!p) | ||
65 | return -EIO; | ||
66 | p = xdr_decode_hyper(p, &b->simple.sigs[i].offset); | ||
67 | b->simple.sigs[i].sig_len = be32_to_cpup(p++); | ||
68 | |||
69 | p = xdr_inline_decode(xdr, b->simple.sigs[i].sig_len); | ||
70 | if (!p) | ||
71 | return -EIO; | ||
72 | memcpy(&b->simple.sigs[i].sig, p, | ||
73 | b->simple.sigs[i].sig_len); | ||
74 | |||
75 | b->simple.len += 8 + 4 + b->simple.sigs[i].sig_len; | ||
76 | } | ||
77 | break; | ||
78 | case PNFS_BLOCK_VOLUME_SLICE: | ||
79 | p = xdr_inline_decode(xdr, 8 + 8 + 4); | ||
80 | if (!p) | ||
81 | return -EIO; | ||
82 | p = xdr_decode_hyper(p, &b->slice.start); | ||
83 | p = xdr_decode_hyper(p, &b->slice.len); | ||
84 | b->slice.volume = be32_to_cpup(p++); | ||
85 | break; | ||
86 | case PNFS_BLOCK_VOLUME_CONCAT: | ||
87 | p = xdr_inline_decode(xdr, 4); | ||
88 | if (!p) | ||
89 | return -EIO; | ||
90 | b->concat.volumes_count = be32_to_cpup(p++); | ||
91 | |||
92 | p = xdr_inline_decode(xdr, b->concat.volumes_count * 4); | ||
93 | if (!p) | ||
94 | return -EIO; | ||
95 | for (i = 0; i < b->concat.volumes_count; i++) | ||
96 | b->concat.volumes[i] = be32_to_cpup(p++); | ||
97 | break; | ||
98 | case PNFS_BLOCK_VOLUME_STRIPE: | ||
99 | p = xdr_inline_decode(xdr, 8 + 4); | ||
100 | if (!p) | ||
101 | return -EIO; | ||
102 | p = xdr_decode_hyper(p, &b->stripe.chunk_size); | ||
103 | b->stripe.volumes_count = be32_to_cpup(p++); | ||
104 | |||
105 | p = xdr_inline_decode(xdr, b->stripe.volumes_count * 4); | ||
106 | if (!p) | ||
107 | return -EIO; | ||
108 | for (i = 0; i < b->stripe.volumes_count; i++) | ||
109 | b->stripe.volumes[i] = be32_to_cpup(p++); | ||
110 | break; | ||
111 | default: | ||
112 | dprintk("unknown volume type!\n"); | ||
113 | return -EIO; | ||
114 | } | ||
115 | |||
116 | return 0; | ||
117 | } | ||
118 | |||
119 | static bool bl_map_simple(struct pnfs_block_dev *dev, u64 offset, | ||
120 | struct pnfs_block_dev_map *map) | ||
121 | { | ||
122 | map->start = dev->start; | ||
123 | map->len = dev->len; | ||
124 | map->disk_offset = dev->disk_offset; | ||
125 | map->bdev = dev->bdev; | ||
126 | return true; | ||
127 | } | ||
128 | |||
129 | static bool bl_map_concat(struct pnfs_block_dev *dev, u64 offset, | ||
130 | struct pnfs_block_dev_map *map) | ||
131 | { | ||
132 | int i; | ||
133 | |||
134 | for (i = 0; i < dev->nr_children; i++) { | ||
135 | struct pnfs_block_dev *child = &dev->children[i]; | ||
136 | |||
137 | if (child->start > offset || | ||
138 | child->start + child->len <= offset) | ||
139 | continue; | ||
140 | |||
141 | child->map(child, offset - child->start, map); | ||
142 | return true; | ||
143 | } | ||
144 | |||
145 | dprintk("%s: ran off loop!\n", __func__); | ||
146 | return false; | ||
147 | } | ||
148 | |||
149 | static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset, | ||
150 | struct pnfs_block_dev_map *map) | ||
151 | { | ||
152 | struct pnfs_block_dev *child; | ||
153 | u64 chunk; | ||
154 | u32 chunk_idx; | ||
155 | u64 disk_offset; | ||
156 | |||
157 | chunk = div_u64(offset, dev->chunk_size); | ||
158 | div_u64_rem(chunk, dev->nr_children, &chunk_idx); | ||
159 | |||
160 | if (chunk_idx > dev->nr_children) { | ||
161 | dprintk("%s: invalid chunk idx %d (%lld/%lld)\n", | ||
162 | __func__, chunk_idx, offset, dev->chunk_size); | ||
163 | /* error, should not happen */ | ||
164 | return false; | ||
165 | } | ||
166 | |||
167 | /* truncate offset to the beginning of the stripe */ | ||
168 | offset = chunk * dev->chunk_size; | ||
169 | |||
170 | /* disk offset of the stripe */ | ||
171 | disk_offset = div_u64(offset, dev->nr_children); | ||
172 | |||
173 | child = &dev->children[chunk_idx]; | ||
174 | child->map(child, disk_offset, map); | ||
175 | |||
176 | map->start += offset; | ||
177 | map->disk_offset += disk_offset; | ||
178 | map->len = dev->chunk_size; | ||
179 | return true; | ||
180 | } | ||
181 | |||
182 | static int | ||
183 | bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d, | ||
184 | struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask); | ||
185 | |||
186 | |||
187 | static int | ||
188 | bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d, | ||
189 | struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) | ||
190 | { | ||
191 | struct pnfs_block_volume *v = &volumes[idx]; | ||
192 | dev_t dev; | ||
193 | |||
194 | dev = bl_resolve_deviceid(server, v, gfp_mask); | ||
195 | if (!dev) | ||
196 | return -EIO; | ||
197 | |||
198 | d->bdev = blkdev_get_by_dev(dev, FMODE_READ, NULL); | ||
199 | if (IS_ERR(d->bdev)) { | ||
200 | printk(KERN_WARNING "pNFS: failed to open device %d:%d (%ld)\n", | ||
201 | MAJOR(dev), MINOR(dev), PTR_ERR(d->bdev)); | ||
202 | return PTR_ERR(d->bdev); | ||
203 | } | ||
204 | |||
205 | |||
206 | d->len = i_size_read(d->bdev->bd_inode); | ||
207 | d->map = bl_map_simple; | ||
208 | |||
209 | printk(KERN_INFO "pNFS: using block device %s\n", | ||
210 | d->bdev->bd_disk->disk_name); | ||
211 | return 0; | ||
212 | } | ||
213 | |||
214 | static int | ||
215 | bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d, | ||
216 | struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) | ||
217 | { | ||
218 | struct pnfs_block_volume *v = &volumes[idx]; | ||
219 | int ret; | ||
220 | |||
221 | ret = bl_parse_deviceid(server, d, volumes, v->slice.volume, gfp_mask); | ||
222 | if (ret) | ||
223 | return ret; | ||
224 | |||
225 | d->disk_offset = v->slice.start; | ||
226 | d->len = v->slice.len; | ||
227 | return 0; | ||
228 | } | ||
229 | |||
230 | static int | ||
231 | bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d, | ||
232 | struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) | ||
233 | { | ||
234 | struct pnfs_block_volume *v = &volumes[idx]; | ||
235 | u64 len = 0; | ||
236 | int ret, i; | ||
237 | |||
238 | d->children = kcalloc(v->concat.volumes_count, | ||
239 | sizeof(struct pnfs_block_dev), GFP_KERNEL); | ||
240 | if (!d->children) | ||
241 | return -ENOMEM; | ||
242 | |||
243 | for (i = 0; i < v->concat.volumes_count; i++) { | ||
244 | ret = bl_parse_deviceid(server, &d->children[i], | ||
245 | volumes, v->concat.volumes[i], gfp_mask); | ||
246 | if (ret) | ||
247 | return ret; | ||
248 | |||
249 | d->nr_children++; | ||
250 | d->children[i].start += len; | ||
251 | len += d->children[i].len; | ||
252 | } | ||
253 | |||
254 | d->len = len; | ||
255 | d->map = bl_map_concat; | ||
256 | return 0; | ||
257 | } | ||
258 | |||
259 | static int | ||
260 | bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d, | ||
261 | struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) | ||
262 | { | ||
263 | struct pnfs_block_volume *v = &volumes[idx]; | ||
264 | u64 len = 0; | ||
265 | int ret, i; | ||
266 | |||
267 | d->children = kcalloc(v->stripe.volumes_count, | ||
268 | sizeof(struct pnfs_block_dev), GFP_KERNEL); | ||
269 | if (!d->children) | ||
270 | return -ENOMEM; | ||
271 | |||
272 | for (i = 0; i < v->stripe.volumes_count; i++) { | ||
273 | ret = bl_parse_deviceid(server, &d->children[i], | ||
274 | volumes, v->stripe.volumes[i], gfp_mask); | ||
275 | if (ret) | ||
276 | return ret; | ||
277 | |||
278 | d->nr_children++; | ||
279 | len += d->children[i].len; | ||
280 | } | ||
281 | |||
282 | d->len = len; | ||
283 | d->chunk_size = v->stripe.chunk_size; | ||
284 | d->map = bl_map_stripe; | ||
285 | return 0; | ||
286 | } | ||
287 | |||
288 | static int | ||
289 | bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d, | ||
290 | struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) | ||
291 | { | ||
292 | switch (volumes[idx].type) { | ||
293 | case PNFS_BLOCK_VOLUME_SIMPLE: | ||
294 | return bl_parse_simple(server, d, volumes, idx, gfp_mask); | ||
295 | case PNFS_BLOCK_VOLUME_SLICE: | ||
296 | return bl_parse_slice(server, d, volumes, idx, gfp_mask); | ||
297 | case PNFS_BLOCK_VOLUME_CONCAT: | ||
298 | return bl_parse_concat(server, d, volumes, idx, gfp_mask); | ||
299 | case PNFS_BLOCK_VOLUME_STRIPE: | ||
300 | return bl_parse_stripe(server, d, volumes, idx, gfp_mask); | ||
301 | default: | ||
302 | dprintk("unsupported volume type: %d\n", volumes[idx].type); | ||
303 | return -EIO; | ||
304 | } | ||
305 | } | ||
306 | |||
307 | struct nfs4_deviceid_node * | ||
308 | bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, | ||
309 | gfp_t gfp_mask) | ||
310 | { | ||
311 | struct nfs4_deviceid_node *node = NULL; | ||
312 | struct pnfs_block_volume *volumes; | ||
313 | struct pnfs_block_dev *top; | ||
314 | struct xdr_stream xdr; | ||
315 | struct xdr_buf buf; | ||
316 | struct page *scratch; | ||
317 | int nr_volumes, ret, i; | ||
318 | __be32 *p; | ||
319 | |||
320 | scratch = alloc_page(gfp_mask); | ||
321 | if (!scratch) | ||
322 | goto out; | ||
323 | |||
324 | xdr_init_decode_pages(&xdr, &buf, pdev->pages, pdev->pglen); | ||
325 | xdr_set_scratch_buffer(&xdr, page_address(scratch), PAGE_SIZE); | ||
326 | |||
327 | p = xdr_inline_decode(&xdr, sizeof(__be32)); | ||
328 | if (!p) | ||
329 | goto out_free_scratch; | ||
330 | nr_volumes = be32_to_cpup(p++); | ||
331 | |||
332 | volumes = kcalloc(nr_volumes, sizeof(struct pnfs_block_volume), | ||
333 | gfp_mask); | ||
334 | if (!volumes) | ||
335 | goto out_free_scratch; | ||
336 | |||
337 | for (i = 0; i < nr_volumes; i++) { | ||
338 | ret = nfs4_block_decode_volume(&xdr, &volumes[i]); | ||
339 | if (ret < 0) | ||
340 | goto out_free_volumes; | ||
341 | } | ||
342 | |||
343 | top = kzalloc(sizeof(*top), gfp_mask); | ||
344 | if (!top) | ||
345 | goto out_free_volumes; | ||
346 | |||
347 | ret = bl_parse_deviceid(server, top, volumes, nr_volumes - 1, gfp_mask); | ||
348 | if (ret) { | ||
349 | bl_free_device(top); | ||
350 | kfree(top); | ||
351 | goto out_free_volumes; | ||
352 | } | ||
353 | |||
354 | node = &top->node; | ||
355 | nfs4_init_deviceid_node(node, server, &pdev->dev_id); | ||
356 | |||
357 | out_free_volumes: | ||
358 | kfree(volumes); | ||
359 | out_free_scratch: | ||
360 | __free_page(scratch); | ||
361 | out: | ||
362 | return node; | ||
363 | } | ||
diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c new file mode 100644 index 000000000000..31d0b5e53dfd --- /dev/null +++ b/fs/nfs/blocklayout/extent_tree.c | |||
@@ -0,0 +1,602 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2014 Christoph Hellwig. | ||
3 | */ | ||
4 | |||
5 | #include <linux/vmalloc.h> | ||
6 | |||
7 | #include "blocklayout.h" | ||
8 | |||
9 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | ||
10 | |||
11 | static inline struct pnfs_block_extent * | ||
12 | ext_node(struct rb_node *node) | ||
13 | { | ||
14 | return rb_entry(node, struct pnfs_block_extent, be_node); | ||
15 | } | ||
16 | |||
17 | static struct pnfs_block_extent * | ||
18 | ext_tree_first(struct rb_root *root) | ||
19 | { | ||
20 | struct rb_node *node = rb_first(root); | ||
21 | return node ? ext_node(node) : NULL; | ||
22 | } | ||
23 | |||
24 | static struct pnfs_block_extent * | ||
25 | ext_tree_prev(struct pnfs_block_extent *be) | ||
26 | { | ||
27 | struct rb_node *node = rb_prev(&be->be_node); | ||
28 | return node ? ext_node(node) : NULL; | ||
29 | } | ||
30 | |||
31 | static struct pnfs_block_extent * | ||
32 | ext_tree_next(struct pnfs_block_extent *be) | ||
33 | { | ||
34 | struct rb_node *node = rb_next(&be->be_node); | ||
35 | return node ? ext_node(node) : NULL; | ||
36 | } | ||
37 | |||
38 | static inline sector_t | ||
39 | ext_f_end(struct pnfs_block_extent *be) | ||
40 | { | ||
41 | return be->be_f_offset + be->be_length; | ||
42 | } | ||
43 | |||
44 | static struct pnfs_block_extent * | ||
45 | __ext_tree_search(struct rb_root *root, sector_t start) | ||
46 | { | ||
47 | struct rb_node *node = root->rb_node; | ||
48 | struct pnfs_block_extent *be = NULL; | ||
49 | |||
50 | while (node) { | ||
51 | be = ext_node(node); | ||
52 | if (start < be->be_f_offset) | ||
53 | node = node->rb_left; | ||
54 | else if (start >= ext_f_end(be)) | ||
55 | node = node->rb_right; | ||
56 | else | ||
57 | return be; | ||
58 | } | ||
59 | |||
60 | if (be) { | ||
61 | if (start < be->be_f_offset) | ||
62 | return be; | ||
63 | |||
64 | if (start >= ext_f_end(be)) | ||
65 | return ext_tree_next(be); | ||
66 | } | ||
67 | |||
68 | return NULL; | ||
69 | } | ||
70 | |||
71 | static bool | ||
72 | ext_can_merge(struct pnfs_block_extent *be1, struct pnfs_block_extent *be2) | ||
73 | { | ||
74 | if (be1->be_state != be2->be_state) | ||
75 | return false; | ||
76 | if (be1->be_device != be2->be_device) | ||
77 | return false; | ||
78 | |||
79 | if (be1->be_f_offset + be1->be_length != be2->be_f_offset) | ||
80 | return false; | ||
81 | |||
82 | if (be1->be_state != PNFS_BLOCK_NONE_DATA && | ||
83 | (be1->be_v_offset + be1->be_length != be2->be_v_offset)) | ||
84 | return false; | ||
85 | |||
86 | if (be1->be_state == PNFS_BLOCK_INVALID_DATA && | ||
87 | be1->be_tag != be2->be_tag) | ||
88 | return false; | ||
89 | |||
90 | return true; | ||
91 | } | ||
92 | |||
93 | static struct pnfs_block_extent * | ||
94 | ext_try_to_merge_left(struct rb_root *root, struct pnfs_block_extent *be) | ||
95 | { | ||
96 | struct pnfs_block_extent *left = ext_tree_prev(be); | ||
97 | |||
98 | if (left && ext_can_merge(left, be)) { | ||
99 | left->be_length += be->be_length; | ||
100 | rb_erase(&be->be_node, root); | ||
101 | nfs4_put_deviceid_node(be->be_device); | ||
102 | kfree(be); | ||
103 | return left; | ||
104 | } | ||
105 | |||
106 | return be; | ||
107 | } | ||
108 | |||
109 | static struct pnfs_block_extent * | ||
110 | ext_try_to_merge_right(struct rb_root *root, struct pnfs_block_extent *be) | ||
111 | { | ||
112 | struct pnfs_block_extent *right = ext_tree_next(be); | ||
113 | |||
114 | if (right && ext_can_merge(be, right)) { | ||
115 | be->be_length += right->be_length; | ||
116 | rb_erase(&right->be_node, root); | ||
117 | nfs4_put_deviceid_node(right->be_device); | ||
118 | kfree(right); | ||
119 | } | ||
120 | |||
121 | return be; | ||
122 | } | ||
123 | |||
124 | static void | ||
125 | __ext_tree_insert(struct rb_root *root, | ||
126 | struct pnfs_block_extent *new, bool merge_ok) | ||
127 | { | ||
128 | struct rb_node **p = &root->rb_node, *parent = NULL; | ||
129 | struct pnfs_block_extent *be; | ||
130 | |||
131 | while (*p) { | ||
132 | parent = *p; | ||
133 | be = ext_node(parent); | ||
134 | |||
135 | if (new->be_f_offset < be->be_f_offset) { | ||
136 | if (merge_ok && ext_can_merge(new, be)) { | ||
137 | be->be_f_offset = new->be_f_offset; | ||
138 | if (be->be_state != PNFS_BLOCK_NONE_DATA) | ||
139 | be->be_v_offset = new->be_v_offset; | ||
140 | be->be_length += new->be_length; | ||
141 | be = ext_try_to_merge_left(root, be); | ||
142 | goto free_new; | ||
143 | } | ||
144 | p = &(*p)->rb_left; | ||
145 | } else if (new->be_f_offset >= ext_f_end(be)) { | ||
146 | if (merge_ok && ext_can_merge(be, new)) { | ||
147 | be->be_length += new->be_length; | ||
148 | be = ext_try_to_merge_right(root, be); | ||
149 | goto free_new; | ||
150 | } | ||
151 | p = &(*p)->rb_right; | ||
152 | } else { | ||
153 | BUG(); | ||
154 | } | ||
155 | } | ||
156 | |||
157 | rb_link_node(&new->be_node, parent, p); | ||
158 | rb_insert_color(&new->be_node, root); | ||
159 | return; | ||
160 | free_new: | ||
161 | nfs4_put_deviceid_node(new->be_device); | ||
162 | kfree(new); | ||
163 | } | ||
164 | |||
165 | static int | ||
166 | __ext_tree_remove(struct rb_root *root, sector_t start, sector_t end) | ||
167 | { | ||
168 | struct pnfs_block_extent *be; | ||
169 | sector_t len1 = 0, len2 = 0; | ||
170 | sector_t orig_v_offset; | ||
171 | sector_t orig_len; | ||
172 | |||
173 | be = __ext_tree_search(root, start); | ||
174 | if (!be) | ||
175 | return 0; | ||
176 | if (be->be_f_offset >= end) | ||
177 | return 0; | ||
178 | |||
179 | orig_v_offset = be->be_v_offset; | ||
180 | orig_len = be->be_length; | ||
181 | |||
182 | if (start > be->be_f_offset) | ||
183 | len1 = start - be->be_f_offset; | ||
184 | if (ext_f_end(be) > end) | ||
185 | len2 = ext_f_end(be) - end; | ||
186 | |||
187 | if (len2 > 0) { | ||
188 | if (len1 > 0) { | ||
189 | struct pnfs_block_extent *new; | ||
190 | |||
191 | new = kzalloc(sizeof(*new), GFP_ATOMIC); | ||
192 | if (!new) | ||
193 | return -ENOMEM; | ||
194 | |||
195 | be->be_length = len1; | ||
196 | |||
197 | new->be_f_offset = end; | ||
198 | if (be->be_state != PNFS_BLOCK_NONE_DATA) { | ||
199 | new->be_v_offset = | ||
200 | orig_v_offset + orig_len - len2; | ||
201 | } | ||
202 | new->be_length = len2; | ||
203 | new->be_state = be->be_state; | ||
204 | new->be_tag = be->be_tag; | ||
205 | new->be_device = nfs4_get_deviceid(be->be_device); | ||
206 | |||
207 | __ext_tree_insert(root, new, true); | ||
208 | } else { | ||
209 | be->be_f_offset = end; | ||
210 | if (be->be_state != PNFS_BLOCK_NONE_DATA) { | ||
211 | be->be_v_offset = | ||
212 | orig_v_offset + orig_len - len2; | ||
213 | } | ||
214 | be->be_length = len2; | ||
215 | } | ||
216 | } else { | ||
217 | if (len1 > 0) { | ||
218 | be->be_length = len1; | ||
219 | be = ext_tree_next(be); | ||
220 | } | ||
221 | |||
222 | while (be && ext_f_end(be) <= end) { | ||
223 | struct pnfs_block_extent *next = ext_tree_next(be); | ||
224 | |||
225 | rb_erase(&be->be_node, root); | ||
226 | nfs4_put_deviceid_node(be->be_device); | ||
227 | kfree(be); | ||
228 | be = next; | ||
229 | } | ||
230 | |||
231 | if (be && be->be_f_offset < end) { | ||
232 | len1 = ext_f_end(be) - end; | ||
233 | be->be_f_offset = end; | ||
234 | if (be->be_state != PNFS_BLOCK_NONE_DATA) | ||
235 | be->be_v_offset += be->be_length - len1; | ||
236 | be->be_length = len1; | ||
237 | } | ||
238 | } | ||
239 | |||
240 | return 0; | ||
241 | } | ||
242 | |||
243 | int | ||
244 | ext_tree_insert(struct pnfs_block_layout *bl, struct pnfs_block_extent *new) | ||
245 | { | ||
246 | struct pnfs_block_extent *be; | ||
247 | struct rb_root *root; | ||
248 | int err = 0; | ||
249 | |||
250 | switch (new->be_state) { | ||
251 | case PNFS_BLOCK_READWRITE_DATA: | ||
252 | case PNFS_BLOCK_INVALID_DATA: | ||
253 | root = &bl->bl_ext_rw; | ||
254 | break; | ||
255 | case PNFS_BLOCK_READ_DATA: | ||
256 | case PNFS_BLOCK_NONE_DATA: | ||
257 | root = &bl->bl_ext_ro; | ||
258 | break; | ||
259 | default: | ||
260 | dprintk("invalid extent type\n"); | ||
261 | return -EINVAL; | ||
262 | } | ||
263 | |||
264 | spin_lock(&bl->bl_ext_lock); | ||
265 | retry: | ||
266 | be = __ext_tree_search(root, new->be_f_offset); | ||
267 | if (!be || be->be_f_offset >= ext_f_end(new)) { | ||
268 | __ext_tree_insert(root, new, true); | ||
269 | } else if (new->be_f_offset >= be->be_f_offset) { | ||
270 | if (ext_f_end(new) <= ext_f_end(be)) { | ||
271 | nfs4_put_deviceid_node(new->be_device); | ||
272 | kfree(new); | ||
273 | } else { | ||
274 | sector_t new_len = ext_f_end(new) - ext_f_end(be); | ||
275 | sector_t diff = new->be_length - new_len; | ||
276 | |||
277 | new->be_f_offset += diff; | ||
278 | new->be_v_offset += diff; | ||
279 | new->be_length = new_len; | ||
280 | goto retry; | ||
281 | } | ||
282 | } else if (ext_f_end(new) <= ext_f_end(be)) { | ||
283 | new->be_length = be->be_f_offset - new->be_f_offset; | ||
284 | __ext_tree_insert(root, new, true); | ||
285 | } else { | ||
286 | struct pnfs_block_extent *split; | ||
287 | sector_t new_len = ext_f_end(new) - ext_f_end(be); | ||
288 | sector_t diff = new->be_length - new_len; | ||
289 | |||
290 | split = kmemdup(new, sizeof(*new), GFP_ATOMIC); | ||
291 | if (!split) { | ||
292 | err = -EINVAL; | ||
293 | goto out; | ||
294 | } | ||
295 | |||
296 | split->be_length = be->be_f_offset - split->be_f_offset; | ||
297 | split->be_device = nfs4_get_deviceid(new->be_device); | ||
298 | __ext_tree_insert(root, split, true); | ||
299 | |||
300 | new->be_f_offset += diff; | ||
301 | new->be_v_offset += diff; | ||
302 | new->be_length = new_len; | ||
303 | goto retry; | ||
304 | } | ||
305 | out: | ||
306 | spin_unlock(&bl->bl_ext_lock); | ||
307 | return err; | ||
308 | } | ||
309 | |||
310 | static bool | ||
311 | __ext_tree_lookup(struct rb_root *root, sector_t isect, | ||
312 | struct pnfs_block_extent *ret) | ||
313 | { | ||
314 | struct rb_node *node; | ||
315 | struct pnfs_block_extent *be; | ||
316 | |||
317 | node = root->rb_node; | ||
318 | while (node) { | ||
319 | be = ext_node(node); | ||
320 | if (isect < be->be_f_offset) | ||
321 | node = node->rb_left; | ||
322 | else if (isect >= ext_f_end(be)) | ||
323 | node = node->rb_right; | ||
324 | else { | ||
325 | *ret = *be; | ||
326 | return true; | ||
327 | } | ||
328 | } | ||
329 | |||
330 | return false; | ||
331 | } | ||
332 | |||
333 | bool | ||
334 | ext_tree_lookup(struct pnfs_block_layout *bl, sector_t isect, | ||
335 | struct pnfs_block_extent *ret, bool rw) | ||
336 | { | ||
337 | bool found = false; | ||
338 | |||
339 | spin_lock(&bl->bl_ext_lock); | ||
340 | if (!rw) | ||
341 | found = __ext_tree_lookup(&bl->bl_ext_ro, isect, ret); | ||
342 | if (!found) | ||
343 | found = __ext_tree_lookup(&bl->bl_ext_rw, isect, ret); | ||
344 | spin_unlock(&bl->bl_ext_lock); | ||
345 | |||
346 | return found; | ||
347 | } | ||
348 | |||
349 | int ext_tree_remove(struct pnfs_block_layout *bl, bool rw, | ||
350 | sector_t start, sector_t end) | ||
351 | { | ||
352 | int err, err2; | ||
353 | |||
354 | spin_lock(&bl->bl_ext_lock); | ||
355 | err = __ext_tree_remove(&bl->bl_ext_ro, start, end); | ||
356 | if (rw) { | ||
357 | err2 = __ext_tree_remove(&bl->bl_ext_rw, start, end); | ||
358 | if (!err) | ||
359 | err = err2; | ||
360 | } | ||
361 | spin_unlock(&bl->bl_ext_lock); | ||
362 | |||
363 | return err; | ||
364 | } | ||
365 | |||
366 | static int | ||
367 | ext_tree_split(struct rb_root *root, struct pnfs_block_extent *be, | ||
368 | sector_t split) | ||
369 | { | ||
370 | struct pnfs_block_extent *new; | ||
371 | sector_t orig_len = be->be_length; | ||
372 | |||
373 | new = kzalloc(sizeof(*new), GFP_ATOMIC); | ||
374 | if (!new) | ||
375 | return -ENOMEM; | ||
376 | |||
377 | be->be_length = split - be->be_f_offset; | ||
378 | |||
379 | new->be_f_offset = split; | ||
380 | if (be->be_state != PNFS_BLOCK_NONE_DATA) | ||
381 | new->be_v_offset = be->be_v_offset + be->be_length; | ||
382 | new->be_length = orig_len - be->be_length; | ||
383 | new->be_state = be->be_state; | ||
384 | new->be_tag = be->be_tag; | ||
385 | new->be_device = nfs4_get_deviceid(be->be_device); | ||
386 | |||
387 | __ext_tree_insert(root, new, false); | ||
388 | return 0; | ||
389 | } | ||
390 | |||
391 | int | ||
392 | ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start, | ||
393 | sector_t len) | ||
394 | { | ||
395 | struct rb_root *root = &bl->bl_ext_rw; | ||
396 | sector_t end = start + len; | ||
397 | struct pnfs_block_extent *be; | ||
398 | int err = 0; | ||
399 | |||
400 | spin_lock(&bl->bl_ext_lock); | ||
401 | /* | ||
402 | * First remove all COW extents or holes from written to range. | ||
403 | */ | ||
404 | err = __ext_tree_remove(&bl->bl_ext_ro, start, end); | ||
405 | if (err) | ||
406 | goto out; | ||
407 | |||
408 | /* | ||
409 | * Then mark all invalid extents in the range as written to. | ||
410 | */ | ||
411 | for (be = __ext_tree_search(root, start); be; be = ext_tree_next(be)) { | ||
412 | if (be->be_f_offset >= end) | ||
413 | break; | ||
414 | |||
415 | if (be->be_state != PNFS_BLOCK_INVALID_DATA || be->be_tag) | ||
416 | continue; | ||
417 | |||
418 | if (be->be_f_offset < start) { | ||
419 | struct pnfs_block_extent *left = ext_tree_prev(be); | ||
420 | |||
421 | if (left && ext_can_merge(left, be)) { | ||
422 | sector_t diff = start - be->be_f_offset; | ||
423 | |||
424 | left->be_length += diff; | ||
425 | |||
426 | be->be_f_offset += diff; | ||
427 | be->be_v_offset += diff; | ||
428 | be->be_length -= diff; | ||
429 | } else { | ||
430 | err = ext_tree_split(root, be, start); | ||
431 | if (err) | ||
432 | goto out; | ||
433 | } | ||
434 | } | ||
435 | |||
436 | if (ext_f_end(be) > end) { | ||
437 | struct pnfs_block_extent *right = ext_tree_next(be); | ||
438 | |||
439 | if (right && ext_can_merge(be, right)) { | ||
440 | sector_t diff = end - be->be_f_offset; | ||
441 | |||
442 | be->be_length -= diff; | ||
443 | |||
444 | right->be_f_offset -= diff; | ||
445 | right->be_v_offset -= diff; | ||
446 | right->be_length += diff; | ||
447 | } else { | ||
448 | err = ext_tree_split(root, be, end); | ||
449 | if (err) | ||
450 | goto out; | ||
451 | } | ||
452 | } | ||
453 | |||
454 | if (be->be_f_offset >= start && ext_f_end(be) <= end) { | ||
455 | be->be_tag = EXTENT_WRITTEN; | ||
456 | be = ext_try_to_merge_left(root, be); | ||
457 | be = ext_try_to_merge_right(root, be); | ||
458 | } | ||
459 | } | ||
460 | out: | ||
461 | spin_unlock(&bl->bl_ext_lock); | ||
462 | return err; | ||
463 | } | ||
464 | |||
465 | static void ext_tree_free_commitdata(struct nfs4_layoutcommit_args *arg, | ||
466 | size_t buffer_size) | ||
467 | { | ||
468 | if (arg->layoutupdate_pages != &arg->layoutupdate_page) { | ||
469 | int nr_pages = DIV_ROUND_UP(buffer_size, PAGE_SIZE), i; | ||
470 | |||
471 | for (i = 0; i < nr_pages; i++) | ||
472 | put_page(arg->layoutupdate_pages[i]); | ||
473 | kfree(arg->layoutupdate_pages); | ||
474 | } else { | ||
475 | put_page(arg->layoutupdate_page); | ||
476 | } | ||
477 | } | ||
478 | |||
479 | static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p, | ||
480 | size_t buffer_size, size_t *count) | ||
481 | { | ||
482 | struct pnfs_block_extent *be; | ||
483 | int ret = 0; | ||
484 | |||
485 | spin_lock(&bl->bl_ext_lock); | ||
486 | for (be = ext_tree_first(&bl->bl_ext_rw); be; be = ext_tree_next(be)) { | ||
487 | if (be->be_state != PNFS_BLOCK_INVALID_DATA || | ||
488 | be->be_tag != EXTENT_WRITTEN) | ||
489 | continue; | ||
490 | |||
491 | (*count)++; | ||
492 | if (*count * BL_EXTENT_SIZE > buffer_size) { | ||
493 | /* keep counting.. */ | ||
494 | ret = -ENOSPC; | ||
495 | continue; | ||
496 | } | ||
497 | |||
498 | p = xdr_encode_opaque_fixed(p, be->be_device->deviceid.data, | ||
499 | NFS4_DEVICEID4_SIZE); | ||
500 | p = xdr_encode_hyper(p, be->be_f_offset << SECTOR_SHIFT); | ||
501 | p = xdr_encode_hyper(p, be->be_length << SECTOR_SHIFT); | ||
502 | p = xdr_encode_hyper(p, 0LL); | ||
503 | *p++ = cpu_to_be32(PNFS_BLOCK_READWRITE_DATA); | ||
504 | |||
505 | be->be_tag = EXTENT_COMMITTING; | ||
506 | } | ||
507 | spin_unlock(&bl->bl_ext_lock); | ||
508 | |||
509 | return ret; | ||
510 | } | ||
511 | |||
512 | int | ||
513 | ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg) | ||
514 | { | ||
515 | struct pnfs_block_layout *bl = BLK_LO2EXT(NFS_I(arg->inode)->layout); | ||
516 | size_t count = 0, buffer_size = PAGE_SIZE; | ||
517 | __be32 *start_p; | ||
518 | int ret; | ||
519 | |||
520 | dprintk("%s enter\n", __func__); | ||
521 | |||
522 | arg->layoutupdate_page = alloc_page(GFP_NOFS); | ||
523 | if (!arg->layoutupdate_page) | ||
524 | return -ENOMEM; | ||
525 | start_p = page_address(arg->layoutupdate_page); | ||
526 | arg->layoutupdate_pages = &arg->layoutupdate_page; | ||
527 | |||
528 | retry: | ||
529 | ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, &count); | ||
530 | if (unlikely(ret)) { | ||
531 | ext_tree_free_commitdata(arg, buffer_size); | ||
532 | |||
533 | buffer_size = sizeof(__be32) + BL_EXTENT_SIZE * count; | ||
534 | count = 0; | ||
535 | |||
536 | arg->layoutupdate_pages = | ||
537 | kcalloc(DIV_ROUND_UP(buffer_size, PAGE_SIZE), | ||
538 | sizeof(struct page *), GFP_NOFS); | ||
539 | if (!arg->layoutupdate_pages) | ||
540 | return -ENOMEM; | ||
541 | |||
542 | start_p = __vmalloc(buffer_size, GFP_NOFS, PAGE_KERNEL); | ||
543 | if (!start_p) { | ||
544 | kfree(arg->layoutupdate_pages); | ||
545 | return -ENOMEM; | ||
546 | } | ||
547 | |||
548 | goto retry; | ||
549 | } | ||
550 | |||
551 | *start_p = cpu_to_be32(count); | ||
552 | arg->layoutupdate_len = sizeof(__be32) + BL_EXTENT_SIZE * count; | ||
553 | |||
554 | if (unlikely(arg->layoutupdate_pages != &arg->layoutupdate_page)) { | ||
555 | __be32 *p = start_p; | ||
556 | int i = 0; | ||
557 | |||
558 | for (p = start_p; | ||
559 | p < start_p + arg->layoutupdate_len; | ||
560 | p += PAGE_SIZE) { | ||
561 | arg->layoutupdate_pages[i++] = vmalloc_to_page(p); | ||
562 | } | ||
563 | } | ||
564 | |||
565 | dprintk("%s found %zu ranges\n", __func__, count); | ||
566 | return 0; | ||
567 | } | ||
568 | |||
569 | void | ||
570 | ext_tree_mark_committed(struct nfs4_layoutcommit_args *arg, int status) | ||
571 | { | ||
572 | struct pnfs_block_layout *bl = BLK_LO2EXT(NFS_I(arg->inode)->layout); | ||
573 | struct rb_root *root = &bl->bl_ext_rw; | ||
574 | struct pnfs_block_extent *be; | ||
575 | |||
576 | dprintk("%s status %d\n", __func__, status); | ||
577 | |||
578 | ext_tree_free_commitdata(arg, arg->layoutupdate_len); | ||
579 | |||
580 | spin_lock(&bl->bl_ext_lock); | ||
581 | for (be = ext_tree_first(root); be; be = ext_tree_next(be)) { | ||
582 | if (be->be_state != PNFS_BLOCK_INVALID_DATA || | ||
583 | be->be_tag != EXTENT_COMMITTING) | ||
584 | continue; | ||
585 | |||
586 | if (status) { | ||
587 | /* | ||
588 | * Mark as written and try again. | ||
589 | * | ||
590 | * XXX: some real error handling here wouldn't hurt.. | ||
591 | */ | ||
592 | be->be_tag = EXTENT_WRITTEN; | ||
593 | } else { | ||
594 | be->be_state = PNFS_BLOCK_READWRITE_DATA; | ||
595 | be->be_tag = 0; | ||
596 | } | ||
597 | |||
598 | be = ext_try_to_merge_left(root, be); | ||
599 | be = ext_try_to_merge_right(root, be); | ||
600 | } | ||
601 | spin_unlock(&bl->bl_ext_lock); | ||
602 | } | ||
diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c deleted file mode 100644 index 4d0161442565..000000000000 --- a/fs/nfs/blocklayout/extents.c +++ /dev/null | |||
@@ -1,908 +0,0 @@ | |||
1 | /* | ||
2 | * linux/fs/nfs/blocklayout/blocklayout.h | ||
3 | * | ||
4 | * Module for the NFSv4.1 pNFS block layout driver. | ||
5 | * | ||
6 | * Copyright (c) 2006 The Regents of the University of Michigan. | ||
7 | * All rights reserved. | ||
8 | * | ||
9 | * Andy Adamson <andros@citi.umich.edu> | ||
10 | * Fred Isaman <iisaman@umich.edu> | ||
11 | * | ||
12 | * permission is granted to use, copy, create derivative works and | ||
13 | * redistribute this software and such derivative works for any purpose, | ||
14 | * so long as the name of the university of michigan is not used in | ||
15 | * any advertising or publicity pertaining to the use or distribution | ||
16 | * of this software without specific, written prior authorization. if | ||
17 | * the above copyright notice or any other identification of the | ||
18 | * university of michigan is included in any copy of any portion of | ||
19 | * this software, then the disclaimer below must also be included. | ||
20 | * | ||
21 | * this software is provided as is, without representation from the | ||
22 | * university of michigan as to its fitness for any purpose, and without | ||
23 | * warranty by the university of michigan of any kind, either express | ||
24 | * or implied, including without limitation the implied warranties of | ||
25 | * merchantability and fitness for a particular purpose. the regents | ||
26 | * of the university of michigan shall not be liable for any damages, | ||
27 | * including special, indirect, incidental, or consequential damages, | ||
28 | * with respect to any claim arising out or in connection with the use | ||
29 | * of the software, even if it has been or is hereafter advised of the | ||
30 | * possibility of such damages. | ||
31 | */ | ||
32 | |||
33 | #include "blocklayout.h" | ||
34 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | ||
35 | |||
36 | /* Bit numbers */ | ||
37 | #define EXTENT_INITIALIZED 0 | ||
38 | #define EXTENT_WRITTEN 1 | ||
39 | #define EXTENT_IN_COMMIT 2 | ||
40 | #define INTERNAL_EXISTS MY_MAX_TAGS | ||
41 | #define INTERNAL_MASK ((1 << INTERNAL_EXISTS) - 1) | ||
42 | |||
43 | /* Returns largest t<=s s.t. t%base==0 */ | ||
44 | static inline sector_t normalize(sector_t s, int base) | ||
45 | { | ||
46 | sector_t tmp = s; /* Since do_div modifies its argument */ | ||
47 | return s - sector_div(tmp, base); | ||
48 | } | ||
49 | |||
50 | static inline sector_t normalize_up(sector_t s, int base) | ||
51 | { | ||
52 | return normalize(s + base - 1, base); | ||
53 | } | ||
54 | |||
55 | /* Complete stub using list while determine API wanted */ | ||
56 | |||
57 | /* Returns tags, or negative */ | ||
58 | static int32_t _find_entry(struct my_tree *tree, u64 s) | ||
59 | { | ||
60 | struct pnfs_inval_tracking *pos; | ||
61 | |||
62 | dprintk("%s(%llu) enter\n", __func__, s); | ||
63 | list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) { | ||
64 | if (pos->it_sector > s) | ||
65 | continue; | ||
66 | else if (pos->it_sector == s) | ||
67 | return pos->it_tags & INTERNAL_MASK; | ||
68 | else | ||
69 | break; | ||
70 | } | ||
71 | return -ENOENT; | ||
72 | } | ||
73 | |||
74 | static inline | ||
75 | int _has_tag(struct my_tree *tree, u64 s, int32_t tag) | ||
76 | { | ||
77 | int32_t tags; | ||
78 | |||
79 | dprintk("%s(%llu, %i) enter\n", __func__, s, tag); | ||
80 | s = normalize(s, tree->mtt_step_size); | ||
81 | tags = _find_entry(tree, s); | ||
82 | if ((tags < 0) || !(tags & (1 << tag))) | ||
83 | return 0; | ||
84 | else | ||
85 | return 1; | ||
86 | } | ||
87 | |||
88 | /* Creates entry with tag, or if entry already exists, unions tag to it. | ||
89 | * If storage is not NULL, newly created entry will use it. | ||
90 | * Returns number of entries added, or negative on error. | ||
91 | */ | ||
92 | static int _add_entry(struct my_tree *tree, u64 s, int32_t tag, | ||
93 | struct pnfs_inval_tracking *storage) | ||
94 | { | ||
95 | int found = 0; | ||
96 | struct pnfs_inval_tracking *pos; | ||
97 | |||
98 | dprintk("%s(%llu, %i, %p) enter\n", __func__, s, tag, storage); | ||
99 | list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) { | ||
100 | if (pos->it_sector > s) | ||
101 | continue; | ||
102 | else if (pos->it_sector == s) { | ||
103 | found = 1; | ||
104 | break; | ||
105 | } else | ||
106 | break; | ||
107 | } | ||
108 | if (found) { | ||
109 | pos->it_tags |= (1 << tag); | ||
110 | return 0; | ||
111 | } else { | ||
112 | struct pnfs_inval_tracking *new; | ||
113 | new = storage; | ||
114 | new->it_sector = s; | ||
115 | new->it_tags = (1 << tag); | ||
116 | list_add(&new->it_link, &pos->it_link); | ||
117 | return 1; | ||
118 | } | ||
119 | } | ||
120 | |||
121 | /* XXXX Really want option to not create */ | ||
122 | /* Over range, unions tag with existing entries, else creates entry with tag */ | ||
123 | static int _set_range(struct my_tree *tree, int32_t tag, u64 s, u64 length) | ||
124 | { | ||
125 | u64 i; | ||
126 | |||
127 | dprintk("%s(%i, %llu, %llu) enter\n", __func__, tag, s, length); | ||
128 | for (i = normalize(s, tree->mtt_step_size); i < s + length; | ||
129 | i += tree->mtt_step_size) | ||
130 | if (_add_entry(tree, i, tag, NULL)) | ||
131 | return -ENOMEM; | ||
132 | return 0; | ||
133 | } | ||
134 | |||
135 | /* Ensure that future operations on given range of tree will not malloc */ | ||
136 | static int _preload_range(struct pnfs_inval_markings *marks, | ||
137 | u64 offset, u64 length) | ||
138 | { | ||
139 | u64 start, end, s; | ||
140 | int count, i, used = 0, status = -ENOMEM; | ||
141 | struct pnfs_inval_tracking **storage; | ||
142 | struct my_tree *tree = &marks->im_tree; | ||
143 | |||
144 | dprintk("%s(%llu, %llu) enter\n", __func__, offset, length); | ||
145 | start = normalize(offset, tree->mtt_step_size); | ||
146 | end = normalize_up(offset + length, tree->mtt_step_size); | ||
147 | count = (int)(end - start) / (int)tree->mtt_step_size; | ||
148 | |||
149 | /* Pre-malloc what memory we might need */ | ||
150 | storage = kcalloc(count, sizeof(*storage), GFP_NOFS); | ||
151 | if (!storage) | ||
152 | return -ENOMEM; | ||
153 | for (i = 0; i < count; i++) { | ||
154 | storage[i] = kmalloc(sizeof(struct pnfs_inval_tracking), | ||
155 | GFP_NOFS); | ||
156 | if (!storage[i]) | ||
157 | goto out_cleanup; | ||
158 | } | ||
159 | |||
160 | spin_lock_bh(&marks->im_lock); | ||
161 | for (s = start; s < end; s += tree->mtt_step_size) | ||
162 | used += _add_entry(tree, s, INTERNAL_EXISTS, storage[used]); | ||
163 | spin_unlock_bh(&marks->im_lock); | ||
164 | |||
165 | status = 0; | ||
166 | |||
167 | out_cleanup: | ||
168 | for (i = used; i < count; i++) { | ||
169 | if (!storage[i]) | ||
170 | break; | ||
171 | kfree(storage[i]); | ||
172 | } | ||
173 | kfree(storage); | ||
174 | return status; | ||
175 | } | ||
176 | |||
177 | /* We are relying on page lock to serialize this */ | ||
178 | int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect) | ||
179 | { | ||
180 | int rv; | ||
181 | |||
182 | spin_lock_bh(&marks->im_lock); | ||
183 | rv = _has_tag(&marks->im_tree, isect, EXTENT_INITIALIZED); | ||
184 | spin_unlock_bh(&marks->im_lock); | ||
185 | return rv; | ||
186 | } | ||
187 | |||
188 | /* Assume start, end already sector aligned */ | ||
189 | static int | ||
190 | _range_has_tag(struct my_tree *tree, u64 start, u64 end, int32_t tag) | ||
191 | { | ||
192 | struct pnfs_inval_tracking *pos; | ||
193 | u64 expect = 0; | ||
194 | |||
195 | dprintk("%s(%llu, %llu, %i) enter\n", __func__, start, end, tag); | ||
196 | list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) { | ||
197 | if (pos->it_sector >= end) | ||
198 | continue; | ||
199 | if (!expect) { | ||
200 | if ((pos->it_sector == end - tree->mtt_step_size) && | ||
201 | (pos->it_tags & (1 << tag))) { | ||
202 | expect = pos->it_sector - tree->mtt_step_size; | ||
203 | if (pos->it_sector < tree->mtt_step_size || expect < start) | ||
204 | return 1; | ||
205 | continue; | ||
206 | } else { | ||
207 | return 0; | ||
208 | } | ||
209 | } | ||
210 | if (pos->it_sector != expect || !(pos->it_tags & (1 << tag))) | ||
211 | return 0; | ||
212 | expect -= tree->mtt_step_size; | ||
213 | if (expect < start) | ||
214 | return 1; | ||
215 | } | ||
216 | return 0; | ||
217 | } | ||
218 | |||
219 | static int is_range_written(struct pnfs_inval_markings *marks, | ||
220 | sector_t start, sector_t end) | ||
221 | { | ||
222 | int rv; | ||
223 | |||
224 | spin_lock_bh(&marks->im_lock); | ||
225 | rv = _range_has_tag(&marks->im_tree, start, end, EXTENT_WRITTEN); | ||
226 | spin_unlock_bh(&marks->im_lock); | ||
227 | return rv; | ||
228 | } | ||
229 | |||
230 | /* Marks sectors in [offest, offset_length) as having been initialized. | ||
231 | * All lengths are step-aligned, where step is min(pagesize, blocksize). | ||
232 | * Currently assumes offset is page-aligned | ||
233 | */ | ||
234 | int bl_mark_sectors_init(struct pnfs_inval_markings *marks, | ||
235 | sector_t offset, sector_t length) | ||
236 | { | ||
237 | sector_t start, end; | ||
238 | |||
239 | dprintk("%s(offset=%llu,len=%llu) enter\n", | ||
240 | __func__, (u64)offset, (u64)length); | ||
241 | |||
242 | start = normalize(offset, marks->im_block_size); | ||
243 | end = normalize_up(offset + length, marks->im_block_size); | ||
244 | if (_preload_range(marks, start, end - start)) | ||
245 | goto outerr; | ||
246 | |||
247 | spin_lock_bh(&marks->im_lock); | ||
248 | if (_set_range(&marks->im_tree, EXTENT_INITIALIZED, offset, length)) | ||
249 | goto out_unlock; | ||
250 | spin_unlock_bh(&marks->im_lock); | ||
251 | |||
252 | return 0; | ||
253 | |||
254 | out_unlock: | ||
255 | spin_unlock_bh(&marks->im_lock); | ||
256 | outerr: | ||
257 | return -ENOMEM; | ||
258 | } | ||
259 | |||
260 | /* Marks sectors in [offest, offset+length) as having been written to disk. | ||
261 | * All lengths should be block aligned. | ||
262 | */ | ||
263 | static int mark_written_sectors(struct pnfs_inval_markings *marks, | ||
264 | sector_t offset, sector_t length) | ||
265 | { | ||
266 | int status; | ||
267 | |||
268 | dprintk("%s(offset=%llu,len=%llu) enter\n", __func__, | ||
269 | (u64)offset, (u64)length); | ||
270 | spin_lock_bh(&marks->im_lock); | ||
271 | status = _set_range(&marks->im_tree, EXTENT_WRITTEN, offset, length); | ||
272 | spin_unlock_bh(&marks->im_lock); | ||
273 | return status; | ||
274 | } | ||
275 | |||
276 | static void print_short_extent(struct pnfs_block_short_extent *be) | ||
277 | { | ||
278 | dprintk("PRINT SHORT EXTENT extent %p\n", be); | ||
279 | if (be) { | ||
280 | dprintk(" be_f_offset %llu\n", (u64)be->bse_f_offset); | ||
281 | dprintk(" be_length %llu\n", (u64)be->bse_length); | ||
282 | } | ||
283 | } | ||
284 | |||
285 | static void print_clist(struct list_head *list, unsigned int count) | ||
286 | { | ||
287 | struct pnfs_block_short_extent *be; | ||
288 | unsigned int i = 0; | ||
289 | |||
290 | ifdebug(FACILITY) { | ||
291 | printk(KERN_DEBUG "****************\n"); | ||
292 | printk(KERN_DEBUG "Extent list looks like:\n"); | ||
293 | list_for_each_entry(be, list, bse_node) { | ||
294 | i++; | ||
295 | print_short_extent(be); | ||
296 | } | ||
297 | if (i != count) | ||
298 | printk(KERN_DEBUG "\n\nExpected %u entries\n\n\n", count); | ||
299 | printk(KERN_DEBUG "****************\n"); | ||
300 | } | ||
301 | } | ||
302 | |||
303 | /* Note: In theory, we should do more checking that devid's match between | ||
304 | * old and new, but if they don't, the lists are too corrupt to salvage anyway. | ||
305 | */ | ||
306 | /* Note this is very similar to bl_add_merge_extent */ | ||
307 | static void add_to_commitlist(struct pnfs_block_layout *bl, | ||
308 | struct pnfs_block_short_extent *new) | ||
309 | { | ||
310 | struct list_head *clist = &bl->bl_commit; | ||
311 | struct pnfs_block_short_extent *old, *save; | ||
312 | sector_t end = new->bse_f_offset + new->bse_length; | ||
313 | |||
314 | dprintk("%s enter\n", __func__); | ||
315 | print_short_extent(new); | ||
316 | print_clist(clist, bl->bl_count); | ||
317 | bl->bl_count++; | ||
318 | /* Scan for proper place to insert, extending new to the left | ||
319 | * as much as possible. | ||
320 | */ | ||
321 | list_for_each_entry_safe(old, save, clist, bse_node) { | ||
322 | if (new->bse_f_offset < old->bse_f_offset) | ||
323 | break; | ||
324 | if (end <= old->bse_f_offset + old->bse_length) { | ||
325 | /* Range is already in list */ | ||
326 | bl->bl_count--; | ||
327 | kfree(new); | ||
328 | return; | ||
329 | } else if (new->bse_f_offset <= | ||
330 | old->bse_f_offset + old->bse_length) { | ||
331 | /* new overlaps or abuts existing be */ | ||
332 | if (new->bse_mdev == old->bse_mdev) { | ||
333 | /* extend new to fully replace old */ | ||
334 | new->bse_length += new->bse_f_offset - | ||
335 | old->bse_f_offset; | ||
336 | new->bse_f_offset = old->bse_f_offset; | ||
337 | list_del(&old->bse_node); | ||
338 | bl->bl_count--; | ||
339 | kfree(old); | ||
340 | } | ||
341 | } | ||
342 | } | ||
343 | /* Note that if we never hit the above break, old will not point to a | ||
344 | * valid extent. However, in that case &old->bse_node==list. | ||
345 | */ | ||
346 | list_add_tail(&new->bse_node, &old->bse_node); | ||
347 | /* Scan forward for overlaps. If we find any, extend new and | ||
348 | * remove the overlapped extent. | ||
349 | */ | ||
350 | old = list_prepare_entry(new, clist, bse_node); | ||
351 | list_for_each_entry_safe_continue(old, save, clist, bse_node) { | ||
352 | if (end < old->bse_f_offset) | ||
353 | break; | ||
354 | /* new overlaps or abuts old */ | ||
355 | if (new->bse_mdev == old->bse_mdev) { | ||
356 | if (end < old->bse_f_offset + old->bse_length) { | ||
357 | /* extend new to fully cover old */ | ||
358 | end = old->bse_f_offset + old->bse_length; | ||
359 | new->bse_length = end - new->bse_f_offset; | ||
360 | } | ||
361 | list_del(&old->bse_node); | ||
362 | bl->bl_count--; | ||
363 | kfree(old); | ||
364 | } | ||
365 | } | ||
366 | dprintk("%s: after merging\n", __func__); | ||
367 | print_clist(clist, bl->bl_count); | ||
368 | } | ||
369 | |||
370 | /* Note the range described by offset, length is guaranteed to be contained | ||
371 | * within be. | ||
372 | * new will be freed, either by this function or add_to_commitlist if they | ||
373 | * decide not to use it, or after LAYOUTCOMMIT uses it in the commitlist. | ||
374 | */ | ||
375 | int bl_mark_for_commit(struct pnfs_block_extent *be, | ||
376 | sector_t offset, sector_t length, | ||
377 | struct pnfs_block_short_extent *new) | ||
378 | { | ||
379 | sector_t new_end, end = offset + length; | ||
380 | struct pnfs_block_layout *bl = container_of(be->be_inval, | ||
381 | struct pnfs_block_layout, | ||
382 | bl_inval); | ||
383 | |||
384 | mark_written_sectors(be->be_inval, offset, length); | ||
385 | /* We want to add the range to commit list, but it must be | ||
386 | * block-normalized, and verified that the normalized range has | ||
387 | * been entirely written to disk. | ||
388 | */ | ||
389 | new->bse_f_offset = offset; | ||
390 | offset = normalize(offset, bl->bl_blocksize); | ||
391 | if (offset < new->bse_f_offset) { | ||
392 | if (is_range_written(be->be_inval, offset, new->bse_f_offset)) | ||
393 | new->bse_f_offset = offset; | ||
394 | else | ||
395 | new->bse_f_offset = offset + bl->bl_blocksize; | ||
396 | } | ||
397 | new_end = normalize_up(end, bl->bl_blocksize); | ||
398 | if (end < new_end) { | ||
399 | if (is_range_written(be->be_inval, end, new_end)) | ||
400 | end = new_end; | ||
401 | else | ||
402 | end = new_end - bl->bl_blocksize; | ||
403 | } | ||
404 | if (end <= new->bse_f_offset) { | ||
405 | kfree(new); | ||
406 | return 0; | ||
407 | } | ||
408 | new->bse_length = end - new->bse_f_offset; | ||
409 | new->bse_devid = be->be_devid; | ||
410 | new->bse_mdev = be->be_mdev; | ||
411 | |||
412 | spin_lock(&bl->bl_ext_lock); | ||
413 | add_to_commitlist(bl, new); | ||
414 | spin_unlock(&bl->bl_ext_lock); | ||
415 | return 0; | ||
416 | } | ||
417 | |||
418 | static void print_bl_extent(struct pnfs_block_extent *be) | ||
419 | { | ||
420 | dprintk("PRINT EXTENT extent %p\n", be); | ||
421 | if (be) { | ||
422 | dprintk(" be_f_offset %llu\n", (u64)be->be_f_offset); | ||
423 | dprintk(" be_length %llu\n", (u64)be->be_length); | ||
424 | dprintk(" be_v_offset %llu\n", (u64)be->be_v_offset); | ||
425 | dprintk(" be_state %d\n", be->be_state); | ||
426 | } | ||
427 | } | ||
428 | |||
429 | static void | ||
430 | destroy_extent(struct kref *kref) | ||
431 | { | ||
432 | struct pnfs_block_extent *be; | ||
433 | |||
434 | be = container_of(kref, struct pnfs_block_extent, be_refcnt); | ||
435 | dprintk("%s be=%p\n", __func__, be); | ||
436 | kfree(be); | ||
437 | } | ||
438 | |||
439 | void | ||
440 | bl_put_extent(struct pnfs_block_extent *be) | ||
441 | { | ||
442 | if (be) { | ||
443 | dprintk("%s enter %p (%i)\n", __func__, be, | ||
444 | atomic_read(&be->be_refcnt.refcount)); | ||
445 | kref_put(&be->be_refcnt, destroy_extent); | ||
446 | } | ||
447 | } | ||
448 | |||
449 | struct pnfs_block_extent *bl_alloc_extent(void) | ||
450 | { | ||
451 | struct pnfs_block_extent *be; | ||
452 | |||
453 | be = kmalloc(sizeof(struct pnfs_block_extent), GFP_NOFS); | ||
454 | if (!be) | ||
455 | return NULL; | ||
456 | INIT_LIST_HEAD(&be->be_node); | ||
457 | kref_init(&be->be_refcnt); | ||
458 | be->be_inval = NULL; | ||
459 | return be; | ||
460 | } | ||
461 | |||
462 | static void print_elist(struct list_head *list) | ||
463 | { | ||
464 | struct pnfs_block_extent *be; | ||
465 | dprintk("****************\n"); | ||
466 | dprintk("Extent list looks like:\n"); | ||
467 | list_for_each_entry(be, list, be_node) { | ||
468 | print_bl_extent(be); | ||
469 | } | ||
470 | dprintk("****************\n"); | ||
471 | } | ||
472 | |||
473 | static inline int | ||
474 | extents_consistent(struct pnfs_block_extent *old, struct pnfs_block_extent *new) | ||
475 | { | ||
476 | /* Note this assumes new->be_f_offset >= old->be_f_offset */ | ||
477 | return (new->be_state == old->be_state) && | ||
478 | ((new->be_state == PNFS_BLOCK_NONE_DATA) || | ||
479 | ((new->be_v_offset - old->be_v_offset == | ||
480 | new->be_f_offset - old->be_f_offset) && | ||
481 | new->be_mdev == old->be_mdev)); | ||
482 | } | ||
483 | |||
484 | /* Adds new to appropriate list in bl, modifying new and removing existing | ||
485 | * extents as appropriate to deal with overlaps. | ||
486 | * | ||
487 | * See bl_find_get_extent for list constraints. | ||
488 | * | ||
489 | * Refcount on new is already set. If end up not using it, or error out, | ||
490 | * need to put the reference. | ||
491 | * | ||
492 | * bl->bl_ext_lock is held by caller. | ||
493 | */ | ||
494 | int | ||
495 | bl_add_merge_extent(struct pnfs_block_layout *bl, | ||
496 | struct pnfs_block_extent *new) | ||
497 | { | ||
498 | struct pnfs_block_extent *be, *tmp; | ||
499 | sector_t end = new->be_f_offset + new->be_length; | ||
500 | struct list_head *list; | ||
501 | |||
502 | dprintk("%s enter with be=%p\n", __func__, new); | ||
503 | print_bl_extent(new); | ||
504 | list = &bl->bl_extents[bl_choose_list(new->be_state)]; | ||
505 | print_elist(list); | ||
506 | |||
507 | /* Scan for proper place to insert, extending new to the left | ||
508 | * as much as possible. | ||
509 | */ | ||
510 | list_for_each_entry_safe_reverse(be, tmp, list, be_node) { | ||
511 | if (new->be_f_offset >= be->be_f_offset + be->be_length) | ||
512 | break; | ||
513 | if (new->be_f_offset >= be->be_f_offset) { | ||
514 | if (end <= be->be_f_offset + be->be_length) { | ||
515 | /* new is a subset of existing be*/ | ||
516 | if (extents_consistent(be, new)) { | ||
517 | dprintk("%s: new is subset, ignoring\n", | ||
518 | __func__); | ||
519 | bl_put_extent(new); | ||
520 | return 0; | ||
521 | } else { | ||
522 | goto out_err; | ||
523 | } | ||
524 | } else { | ||
525 | /* |<-- be -->| | ||
526 | * |<-- new -->| */ | ||
527 | if (extents_consistent(be, new)) { | ||
528 | /* extend new to fully replace be */ | ||
529 | new->be_length += new->be_f_offset - | ||
530 | be->be_f_offset; | ||
531 | new->be_f_offset = be->be_f_offset; | ||
532 | new->be_v_offset = be->be_v_offset; | ||
533 | dprintk("%s: removing %p\n", __func__, be); | ||
534 | list_del(&be->be_node); | ||
535 | bl_put_extent(be); | ||
536 | } else { | ||
537 | goto out_err; | ||
538 | } | ||
539 | } | ||
540 | } else if (end >= be->be_f_offset + be->be_length) { | ||
541 | /* new extent overlap existing be */ | ||
542 | if (extents_consistent(be, new)) { | ||
543 | /* extend new to fully replace be */ | ||
544 | dprintk("%s: removing %p\n", __func__, be); | ||
545 | list_del(&be->be_node); | ||
546 | bl_put_extent(be); | ||
547 | } else { | ||
548 | goto out_err; | ||
549 | } | ||
550 | } else if (end > be->be_f_offset) { | ||
551 | /* |<-- be -->| | ||
552 | *|<-- new -->| */ | ||
553 | if (extents_consistent(new, be)) { | ||
554 | /* extend new to fully replace be */ | ||
555 | new->be_length += be->be_f_offset + be->be_length - | ||
556 | new->be_f_offset - new->be_length; | ||
557 | dprintk("%s: removing %p\n", __func__, be); | ||
558 | list_del(&be->be_node); | ||
559 | bl_put_extent(be); | ||
560 | } else { | ||
561 | goto out_err; | ||
562 | } | ||
563 | } | ||
564 | } | ||
565 | /* Note that if we never hit the above break, be will not point to a | ||
566 | * valid extent. However, in that case &be->be_node==list. | ||
567 | */ | ||
568 | list_add(&new->be_node, &be->be_node); | ||
569 | dprintk("%s: inserting new\n", __func__); | ||
570 | print_elist(list); | ||
571 | /* FIXME - The per-list consistency checks have all been done, | ||
572 | * should now check cross-list consistency. | ||
573 | */ | ||
574 | return 0; | ||
575 | |||
576 | out_err: | ||
577 | bl_put_extent(new); | ||
578 | return -EIO; | ||
579 | } | ||
580 | |||
581 | /* Returns extent, or NULL. If a second READ extent exists, it is returned | ||
582 | * in cow_read, if given. | ||
583 | * | ||
584 | * The extents are kept in two seperate ordered lists, one for READ and NONE, | ||
585 | * one for READWRITE and INVALID. Within each list, we assume: | ||
586 | * 1. Extents are ordered by file offset. | ||
587 | * 2. For any given isect, there is at most one extents that matches. | ||
588 | */ | ||
589 | struct pnfs_block_extent * | ||
590 | bl_find_get_extent(struct pnfs_block_layout *bl, sector_t isect, | ||
591 | struct pnfs_block_extent **cow_read) | ||
592 | { | ||
593 | struct pnfs_block_extent *be, *cow, *ret; | ||
594 | int i; | ||
595 | |||
596 | dprintk("%s enter with isect %llu\n", __func__, (u64)isect); | ||
597 | cow = ret = NULL; | ||
598 | spin_lock(&bl->bl_ext_lock); | ||
599 | for (i = 0; i < EXTENT_LISTS; i++) { | ||
600 | list_for_each_entry_reverse(be, &bl->bl_extents[i], be_node) { | ||
601 | if (isect >= be->be_f_offset + be->be_length) | ||
602 | break; | ||
603 | if (isect >= be->be_f_offset) { | ||
604 | /* We have found an extent */ | ||
605 | dprintk("%s Get %p (%i)\n", __func__, be, | ||
606 | atomic_read(&be->be_refcnt.refcount)); | ||
607 | kref_get(&be->be_refcnt); | ||
608 | if (!ret) | ||
609 | ret = be; | ||
610 | else if (be->be_state != PNFS_BLOCK_READ_DATA) | ||
611 | bl_put_extent(be); | ||
612 | else | ||
613 | cow = be; | ||
614 | break; | ||
615 | } | ||
616 | } | ||
617 | if (ret && | ||
618 | (!cow_read || ret->be_state != PNFS_BLOCK_INVALID_DATA)) | ||
619 | break; | ||
620 | } | ||
621 | spin_unlock(&bl->bl_ext_lock); | ||
622 | if (cow_read) | ||
623 | *cow_read = cow; | ||
624 | print_bl_extent(ret); | ||
625 | return ret; | ||
626 | } | ||
627 | |||
628 | /* Similar to bl_find_get_extent, but called with lock held, and ignores cow */ | ||
629 | static struct pnfs_block_extent * | ||
630 | bl_find_get_extent_locked(struct pnfs_block_layout *bl, sector_t isect) | ||
631 | { | ||
632 | struct pnfs_block_extent *be, *ret = NULL; | ||
633 | int i; | ||
634 | |||
635 | dprintk("%s enter with isect %llu\n", __func__, (u64)isect); | ||
636 | for (i = 0; i < EXTENT_LISTS; i++) { | ||
637 | if (ret) | ||
638 | break; | ||
639 | list_for_each_entry_reverse(be, &bl->bl_extents[i], be_node) { | ||
640 | if (isect >= be->be_f_offset + be->be_length) | ||
641 | break; | ||
642 | if (isect >= be->be_f_offset) { | ||
643 | /* We have found an extent */ | ||
644 | dprintk("%s Get %p (%i)\n", __func__, be, | ||
645 | atomic_read(&be->be_refcnt.refcount)); | ||
646 | kref_get(&be->be_refcnt); | ||
647 | ret = be; | ||
648 | break; | ||
649 | } | ||
650 | } | ||
651 | } | ||
652 | print_bl_extent(ret); | ||
653 | return ret; | ||
654 | } | ||
655 | |||
656 | int | ||
657 | encode_pnfs_block_layoutupdate(struct pnfs_block_layout *bl, | ||
658 | struct xdr_stream *xdr, | ||
659 | const struct nfs4_layoutcommit_args *arg) | ||
660 | { | ||
661 | struct pnfs_block_short_extent *lce, *save; | ||
662 | unsigned int count = 0; | ||
663 | __be32 *p, *xdr_start; | ||
664 | |||
665 | dprintk("%s enter\n", __func__); | ||
666 | /* BUG - creation of bl_commit is buggy - need to wait for | ||
667 | * entire block to be marked WRITTEN before it can be added. | ||
668 | */ | ||
669 | spin_lock(&bl->bl_ext_lock); | ||
670 | /* Want to adjust for possible truncate */ | ||
671 | /* We now want to adjust argument range */ | ||
672 | |||
673 | /* XDR encode the ranges found */ | ||
674 | xdr_start = xdr_reserve_space(xdr, 8); | ||
675 | if (!xdr_start) | ||
676 | goto out; | ||
677 | list_for_each_entry_safe(lce, save, &bl->bl_commit, bse_node) { | ||
678 | p = xdr_reserve_space(xdr, 7 * 4 + sizeof(lce->bse_devid.data)); | ||
679 | if (!p) | ||
680 | break; | ||
681 | p = xdr_encode_opaque_fixed(p, lce->bse_devid.data, NFS4_DEVICEID4_SIZE); | ||
682 | p = xdr_encode_hyper(p, lce->bse_f_offset << SECTOR_SHIFT); | ||
683 | p = xdr_encode_hyper(p, lce->bse_length << SECTOR_SHIFT); | ||
684 | p = xdr_encode_hyper(p, 0LL); | ||
685 | *p++ = cpu_to_be32(PNFS_BLOCK_READWRITE_DATA); | ||
686 | list_move_tail(&lce->bse_node, &bl->bl_committing); | ||
687 | bl->bl_count--; | ||
688 | count++; | ||
689 | } | ||
690 | xdr_start[0] = cpu_to_be32((xdr->p - xdr_start - 1) * 4); | ||
691 | xdr_start[1] = cpu_to_be32(count); | ||
692 | out: | ||
693 | spin_unlock(&bl->bl_ext_lock); | ||
694 | dprintk("%s found %i ranges\n", __func__, count); | ||
695 | return 0; | ||
696 | } | ||
697 | |||
698 | /* Helper function to set_to_rw that initialize a new extent */ | ||
699 | static void | ||
700 | _prep_new_extent(struct pnfs_block_extent *new, | ||
701 | struct pnfs_block_extent *orig, | ||
702 | sector_t offset, sector_t length, int state) | ||
703 | { | ||
704 | kref_init(&new->be_refcnt); | ||
705 | /* don't need to INIT_LIST_HEAD(&new->be_node) */ | ||
706 | memcpy(&new->be_devid, &orig->be_devid, sizeof(struct nfs4_deviceid)); | ||
707 | new->be_mdev = orig->be_mdev; | ||
708 | new->be_f_offset = offset; | ||
709 | new->be_length = length; | ||
710 | new->be_v_offset = orig->be_v_offset - orig->be_f_offset + offset; | ||
711 | new->be_state = state; | ||
712 | new->be_inval = orig->be_inval; | ||
713 | } | ||
714 | |||
715 | /* Tries to merge be with extent in front of it in list. | ||
716 | * Frees storage if not used. | ||
717 | */ | ||
718 | static struct pnfs_block_extent * | ||
719 | _front_merge(struct pnfs_block_extent *be, struct list_head *head, | ||
720 | struct pnfs_block_extent *storage) | ||
721 | { | ||
722 | struct pnfs_block_extent *prev; | ||
723 | |||
724 | if (!storage) | ||
725 | goto no_merge; | ||
726 | if (&be->be_node == head || be->be_node.prev == head) | ||
727 | goto no_merge; | ||
728 | prev = list_entry(be->be_node.prev, struct pnfs_block_extent, be_node); | ||
729 | if ((prev->be_f_offset + prev->be_length != be->be_f_offset) || | ||
730 | !extents_consistent(prev, be)) | ||
731 | goto no_merge; | ||
732 | _prep_new_extent(storage, prev, prev->be_f_offset, | ||
733 | prev->be_length + be->be_length, prev->be_state); | ||
734 | list_replace(&prev->be_node, &storage->be_node); | ||
735 | bl_put_extent(prev); | ||
736 | list_del(&be->be_node); | ||
737 | bl_put_extent(be); | ||
738 | return storage; | ||
739 | |||
740 | no_merge: | ||
741 | kfree(storage); | ||
742 | return be; | ||
743 | } | ||
744 | |||
745 | static u64 | ||
746 | set_to_rw(struct pnfs_block_layout *bl, u64 offset, u64 length) | ||
747 | { | ||
748 | u64 rv = offset + length; | ||
749 | struct pnfs_block_extent *be, *e1, *e2, *e3, *new, *old; | ||
750 | struct pnfs_block_extent *children[3]; | ||
751 | struct pnfs_block_extent *merge1 = NULL, *merge2 = NULL; | ||
752 | int i = 0, j; | ||
753 | |||
754 | dprintk("%s(%llu, %llu)\n", __func__, offset, length); | ||
755 | /* Create storage for up to three new extents e1, e2, e3 */ | ||
756 | e1 = kmalloc(sizeof(*e1), GFP_ATOMIC); | ||
757 | e2 = kmalloc(sizeof(*e2), GFP_ATOMIC); | ||
758 | e3 = kmalloc(sizeof(*e3), GFP_ATOMIC); | ||
759 | /* BUG - we are ignoring any failure */ | ||
760 | if (!e1 || !e2 || !e3) | ||
761 | goto out_nosplit; | ||
762 | |||
763 | spin_lock(&bl->bl_ext_lock); | ||
764 | be = bl_find_get_extent_locked(bl, offset); | ||
765 | rv = be->be_f_offset + be->be_length; | ||
766 | if (be->be_state != PNFS_BLOCK_INVALID_DATA) { | ||
767 | spin_unlock(&bl->bl_ext_lock); | ||
768 | goto out_nosplit; | ||
769 | } | ||
770 | /* Add e* to children, bumping e*'s krefs */ | ||
771 | if (be->be_f_offset != offset) { | ||
772 | _prep_new_extent(e1, be, be->be_f_offset, | ||
773 | offset - be->be_f_offset, | ||
774 | PNFS_BLOCK_INVALID_DATA); | ||
775 | children[i++] = e1; | ||
776 | print_bl_extent(e1); | ||
777 | } else | ||
778 | merge1 = e1; | ||
779 | _prep_new_extent(e2, be, offset, | ||
780 | min(length, be->be_f_offset + be->be_length - offset), | ||
781 | PNFS_BLOCK_READWRITE_DATA); | ||
782 | children[i++] = e2; | ||
783 | print_bl_extent(e2); | ||
784 | if (offset + length < be->be_f_offset + be->be_length) { | ||
785 | _prep_new_extent(e3, be, e2->be_f_offset + e2->be_length, | ||
786 | be->be_f_offset + be->be_length - | ||
787 | offset - length, | ||
788 | PNFS_BLOCK_INVALID_DATA); | ||
789 | children[i++] = e3; | ||
790 | print_bl_extent(e3); | ||
791 | } else | ||
792 | merge2 = e3; | ||
793 | |||
794 | /* Remove be from list, and insert the e* */ | ||
795 | /* We don't get refs on e*, since this list is the base reference | ||
796 | * set when init'ed. | ||
797 | */ | ||
798 | if (i < 3) | ||
799 | children[i] = NULL; | ||
800 | new = children[0]; | ||
801 | list_replace(&be->be_node, &new->be_node); | ||
802 | bl_put_extent(be); | ||
803 | new = _front_merge(new, &bl->bl_extents[RW_EXTENT], merge1); | ||
804 | for (j = 1; j < i; j++) { | ||
805 | old = new; | ||
806 | new = children[j]; | ||
807 | list_add(&new->be_node, &old->be_node); | ||
808 | } | ||
809 | if (merge2) { | ||
810 | /* This is a HACK, should just create a _back_merge function */ | ||
811 | new = list_entry(new->be_node.next, | ||
812 | struct pnfs_block_extent, be_node); | ||
813 | new = _front_merge(new, &bl->bl_extents[RW_EXTENT], merge2); | ||
814 | } | ||
815 | spin_unlock(&bl->bl_ext_lock); | ||
816 | |||
817 | /* Since we removed the base reference above, be is now scheduled for | ||
818 | * destruction. | ||
819 | */ | ||
820 | bl_put_extent(be); | ||
821 | dprintk("%s returns %llu after split\n", __func__, rv); | ||
822 | return rv; | ||
823 | |||
824 | out_nosplit: | ||
825 | kfree(e1); | ||
826 | kfree(e2); | ||
827 | kfree(e3); | ||
828 | dprintk("%s returns %llu without splitting\n", __func__, rv); | ||
829 | return rv; | ||
830 | } | ||
831 | |||
832 | void | ||
833 | clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl, | ||
834 | const struct nfs4_layoutcommit_args *arg, | ||
835 | int status) | ||
836 | { | ||
837 | struct pnfs_block_short_extent *lce, *save; | ||
838 | |||
839 | dprintk("%s status %d\n", __func__, status); | ||
840 | list_for_each_entry_safe(lce, save, &bl->bl_committing, bse_node) { | ||
841 | if (likely(!status)) { | ||
842 | u64 offset = lce->bse_f_offset; | ||
843 | u64 end = offset + lce->bse_length; | ||
844 | |||
845 | do { | ||
846 | offset = set_to_rw(bl, offset, end - offset); | ||
847 | } while (offset < end); | ||
848 | list_del(&lce->bse_node); | ||
849 | |||
850 | kfree(lce); | ||
851 | } else { | ||
852 | list_del(&lce->bse_node); | ||
853 | spin_lock(&bl->bl_ext_lock); | ||
854 | add_to_commitlist(bl, lce); | ||
855 | spin_unlock(&bl->bl_ext_lock); | ||
856 | } | ||
857 | } | ||
858 | } | ||
859 | |||
860 | int bl_push_one_short_extent(struct pnfs_inval_markings *marks) | ||
861 | { | ||
862 | struct pnfs_block_short_extent *new; | ||
863 | |||
864 | new = kmalloc(sizeof(*new), GFP_NOFS); | ||
865 | if (unlikely(!new)) | ||
866 | return -ENOMEM; | ||
867 | |||
868 | spin_lock_bh(&marks->im_lock); | ||
869 | list_add(&new->bse_node, &marks->im_extents); | ||
870 | spin_unlock_bh(&marks->im_lock); | ||
871 | |||
872 | return 0; | ||
873 | } | ||
874 | |||
875 | struct pnfs_block_short_extent * | ||
876 | bl_pop_one_short_extent(struct pnfs_inval_markings *marks) | ||
877 | { | ||
878 | struct pnfs_block_short_extent *rv = NULL; | ||
879 | |||
880 | spin_lock_bh(&marks->im_lock); | ||
881 | if (!list_empty(&marks->im_extents)) { | ||
882 | rv = list_entry((&marks->im_extents)->next, | ||
883 | struct pnfs_block_short_extent, bse_node); | ||
884 | list_del_init(&rv->bse_node); | ||
885 | } | ||
886 | spin_unlock_bh(&marks->im_lock); | ||
887 | |||
888 | return rv; | ||
889 | } | ||
890 | |||
891 | void bl_free_short_extents(struct pnfs_inval_markings *marks, int num_to_free) | ||
892 | { | ||
893 | struct pnfs_block_short_extent *se = NULL, *tmp; | ||
894 | |||
895 | if (num_to_free <= 0) | ||
896 | return; | ||
897 | |||
898 | spin_lock(&marks->im_lock); | ||
899 | list_for_each_entry_safe(se, tmp, &marks->im_extents, bse_node) { | ||
900 | list_del(&se->bse_node); | ||
901 | kfree(se); | ||
902 | if (--num_to_free == 0) | ||
903 | break; | ||
904 | } | ||
905 | spin_unlock(&marks->im_lock); | ||
906 | |||
907 | BUG_ON(num_to_free > 0); | ||
908 | } | ||
diff --git a/fs/nfs/blocklayout/rpc_pipefs.c b/fs/nfs/blocklayout/rpc_pipefs.c new file mode 100644 index 000000000000..8d04bda2bd2e --- /dev/null +++ b/fs/nfs/blocklayout/rpc_pipefs.c | |||
@@ -0,0 +1,285 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006,2007 The Regents of the University of Michigan. | ||
3 | * All rights reserved. | ||
4 | * | ||
5 | * Andy Adamson <andros@citi.umich.edu> | ||
6 | * Fred Isaman <iisaman@umich.edu> | ||
7 | * | ||
8 | * permission is granted to use, copy, create derivative works and | ||
9 | * redistribute this software and such derivative works for any purpose, | ||
10 | * so long as the name of the university of michigan is not used in | ||
11 | * any advertising or publicity pertaining to the use or distribution | ||
12 | * of this software without specific, written prior authorization. if | ||
13 | * the above copyright notice or any other identification of the | ||
14 | * university of michigan is included in any copy of any portion of | ||
15 | * this software, then the disclaimer below must also be included. | ||
16 | * | ||
17 | * this software is provided as is, without representation from the | ||
18 | * university of michigan as to its fitness for any purpose, and without | ||
19 | * warranty by the university of michigan of any kind, either express | ||
20 | * or implied, including without limitation the implied warranties of | ||
21 | * merchantability and fitness for a particular purpose. the regents | ||
22 | * of the university of michigan shall not be liable for any damages, | ||
23 | * including special, indirect, incidental, or consequential damages, | ||
24 | * with respect to any claim arising out or in connection with the use | ||
25 | * of the software, even if it has been or is hereafter advised of the | ||
26 | * possibility of such damages. | ||
27 | */ | ||
28 | |||
29 | #include <linux/module.h> | ||
30 | #include <linux/genhd.h> | ||
31 | #include <linux/blkdev.h> | ||
32 | |||
33 | #include "blocklayout.h" | ||
34 | |||
35 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | ||
36 | |||
37 | static void | ||
38 | nfs4_encode_simple(__be32 *p, struct pnfs_block_volume *b) | ||
39 | { | ||
40 | int i; | ||
41 | |||
42 | *p++ = cpu_to_be32(1); | ||
43 | *p++ = cpu_to_be32(b->type); | ||
44 | *p++ = cpu_to_be32(b->simple.nr_sigs); | ||
45 | for (i = 0; i < b->simple.nr_sigs; i++) { | ||
46 | p = xdr_encode_hyper(p, b->simple.sigs[i].offset); | ||
47 | p = xdr_encode_opaque(p, b->simple.sigs[i].sig, | ||
48 | b->simple.sigs[i].sig_len); | ||
49 | } | ||
50 | } | ||
51 | |||
52 | dev_t | ||
53 | bl_resolve_deviceid(struct nfs_server *server, struct pnfs_block_volume *b, | ||
54 | gfp_t gfp_mask) | ||
55 | { | ||
56 | struct net *net = server->nfs_client->cl_net; | ||
57 | struct nfs_net *nn = net_generic(net, nfs_net_id); | ||
58 | struct bl_dev_msg *reply = &nn->bl_mount_reply; | ||
59 | struct bl_pipe_msg bl_pipe_msg; | ||
60 | struct rpc_pipe_msg *msg = &bl_pipe_msg.msg; | ||
61 | struct bl_msg_hdr *bl_msg; | ||
62 | DECLARE_WAITQUEUE(wq, current); | ||
63 | dev_t dev = 0; | ||
64 | int rc; | ||
65 | |||
66 | dprintk("%s CREATING PIPEFS MESSAGE\n", __func__); | ||
67 | |||
68 | bl_pipe_msg.bl_wq = &nn->bl_wq; | ||
69 | |||
70 | b->simple.len += 4; /* single volume */ | ||
71 | if (b->simple.len > PAGE_SIZE) | ||
72 | return -EIO; | ||
73 | |||
74 | memset(msg, 0, sizeof(*msg)); | ||
75 | msg->len = sizeof(*bl_msg) + b->simple.len; | ||
76 | msg->data = kzalloc(msg->len, gfp_mask); | ||
77 | if (!msg->data) | ||
78 | goto out; | ||
79 | |||
80 | bl_msg = msg->data; | ||
81 | bl_msg->type = BL_DEVICE_MOUNT, | ||
82 | bl_msg->totallen = b->simple.len; | ||
83 | nfs4_encode_simple(msg->data + sizeof(*bl_msg), b); | ||
84 | |||
85 | dprintk("%s CALLING USERSPACE DAEMON\n", __func__); | ||
86 | add_wait_queue(&nn->bl_wq, &wq); | ||
87 | rc = rpc_queue_upcall(nn->bl_device_pipe, msg); | ||
88 | if (rc < 0) { | ||
89 | remove_wait_queue(&nn->bl_wq, &wq); | ||
90 | goto out; | ||
91 | } | ||
92 | |||
93 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
94 | schedule(); | ||
95 | __set_current_state(TASK_RUNNING); | ||
96 | remove_wait_queue(&nn->bl_wq, &wq); | ||
97 | |||
98 | if (reply->status != BL_DEVICE_REQUEST_PROC) { | ||
99 | printk(KERN_WARNING "%s failed to decode device: %d\n", | ||
100 | __func__, reply->status); | ||
101 | goto out; | ||
102 | } | ||
103 | |||
104 | dev = MKDEV(reply->major, reply->minor); | ||
105 | out: | ||
106 | kfree(msg->data); | ||
107 | return dev; | ||
108 | } | ||
109 | |||
110 | static ssize_t bl_pipe_downcall(struct file *filp, const char __user *src, | ||
111 | size_t mlen) | ||
112 | { | ||
113 | struct nfs_net *nn = net_generic(filp->f_dentry->d_sb->s_fs_info, | ||
114 | nfs_net_id); | ||
115 | |||
116 | if (mlen != sizeof (struct bl_dev_msg)) | ||
117 | return -EINVAL; | ||
118 | |||
119 | if (copy_from_user(&nn->bl_mount_reply, src, mlen) != 0) | ||
120 | return -EFAULT; | ||
121 | |||
122 | wake_up(&nn->bl_wq); | ||
123 | |||
124 | return mlen; | ||
125 | } | ||
126 | |||
127 | static void bl_pipe_destroy_msg(struct rpc_pipe_msg *msg) | ||
128 | { | ||
129 | struct bl_pipe_msg *bl_pipe_msg = | ||
130 | container_of(msg, struct bl_pipe_msg, msg); | ||
131 | |||
132 | if (msg->errno >= 0) | ||
133 | return; | ||
134 | wake_up(bl_pipe_msg->bl_wq); | ||
135 | } | ||
136 | |||
137 | static const struct rpc_pipe_ops bl_upcall_ops = { | ||
138 | .upcall = rpc_pipe_generic_upcall, | ||
139 | .downcall = bl_pipe_downcall, | ||
140 | .destroy_msg = bl_pipe_destroy_msg, | ||
141 | }; | ||
142 | |||
143 | static struct dentry *nfs4blocklayout_register_sb(struct super_block *sb, | ||
144 | struct rpc_pipe *pipe) | ||
145 | { | ||
146 | struct dentry *dir, *dentry; | ||
147 | |||
148 | dir = rpc_d_lookup_sb(sb, NFS_PIPE_DIRNAME); | ||
149 | if (dir == NULL) | ||
150 | return ERR_PTR(-ENOENT); | ||
151 | dentry = rpc_mkpipe_dentry(dir, "blocklayout", NULL, pipe); | ||
152 | dput(dir); | ||
153 | return dentry; | ||
154 | } | ||
155 | |||
156 | static void nfs4blocklayout_unregister_sb(struct super_block *sb, | ||
157 | struct rpc_pipe *pipe) | ||
158 | { | ||
159 | if (pipe->dentry) | ||
160 | rpc_unlink(pipe->dentry); | ||
161 | } | ||
162 | |||
163 | static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, | ||
164 | void *ptr) | ||
165 | { | ||
166 | struct super_block *sb = ptr; | ||
167 | struct net *net = sb->s_fs_info; | ||
168 | struct nfs_net *nn = net_generic(net, nfs_net_id); | ||
169 | struct dentry *dentry; | ||
170 | int ret = 0; | ||
171 | |||
172 | if (!try_module_get(THIS_MODULE)) | ||
173 | return 0; | ||
174 | |||
175 | if (nn->bl_device_pipe == NULL) { | ||
176 | module_put(THIS_MODULE); | ||
177 | return 0; | ||
178 | } | ||
179 | |||
180 | switch (event) { | ||
181 | case RPC_PIPEFS_MOUNT: | ||
182 | dentry = nfs4blocklayout_register_sb(sb, nn->bl_device_pipe); | ||
183 | if (IS_ERR(dentry)) { | ||
184 | ret = PTR_ERR(dentry); | ||
185 | break; | ||
186 | } | ||
187 | nn->bl_device_pipe->dentry = dentry; | ||
188 | break; | ||
189 | case RPC_PIPEFS_UMOUNT: | ||
190 | if (nn->bl_device_pipe->dentry) | ||
191 | nfs4blocklayout_unregister_sb(sb, nn->bl_device_pipe); | ||
192 | break; | ||
193 | default: | ||
194 | ret = -ENOTSUPP; | ||
195 | break; | ||
196 | } | ||
197 | module_put(THIS_MODULE); | ||
198 | return ret; | ||
199 | } | ||
200 | |||
201 | static struct notifier_block nfs4blocklayout_block = { | ||
202 | .notifier_call = rpc_pipefs_event, | ||
203 | }; | ||
204 | |||
205 | static struct dentry *nfs4blocklayout_register_net(struct net *net, | ||
206 | struct rpc_pipe *pipe) | ||
207 | { | ||
208 | struct super_block *pipefs_sb; | ||
209 | struct dentry *dentry; | ||
210 | |||
211 | pipefs_sb = rpc_get_sb_net(net); | ||
212 | if (!pipefs_sb) | ||
213 | return NULL; | ||
214 | dentry = nfs4blocklayout_register_sb(pipefs_sb, pipe); | ||
215 | rpc_put_sb_net(net); | ||
216 | return dentry; | ||
217 | } | ||
218 | |||
219 | static void nfs4blocklayout_unregister_net(struct net *net, | ||
220 | struct rpc_pipe *pipe) | ||
221 | { | ||
222 | struct super_block *pipefs_sb; | ||
223 | |||
224 | pipefs_sb = rpc_get_sb_net(net); | ||
225 | if (pipefs_sb) { | ||
226 | nfs4blocklayout_unregister_sb(pipefs_sb, pipe); | ||
227 | rpc_put_sb_net(net); | ||
228 | } | ||
229 | } | ||
230 | |||
231 | static int nfs4blocklayout_net_init(struct net *net) | ||
232 | { | ||
233 | struct nfs_net *nn = net_generic(net, nfs_net_id); | ||
234 | struct dentry *dentry; | ||
235 | |||
236 | init_waitqueue_head(&nn->bl_wq); | ||
237 | nn->bl_device_pipe = rpc_mkpipe_data(&bl_upcall_ops, 0); | ||
238 | if (IS_ERR(nn->bl_device_pipe)) | ||
239 | return PTR_ERR(nn->bl_device_pipe); | ||
240 | dentry = nfs4blocklayout_register_net(net, nn->bl_device_pipe); | ||
241 | if (IS_ERR(dentry)) { | ||
242 | rpc_destroy_pipe_data(nn->bl_device_pipe); | ||
243 | return PTR_ERR(dentry); | ||
244 | } | ||
245 | nn->bl_device_pipe->dentry = dentry; | ||
246 | return 0; | ||
247 | } | ||
248 | |||
249 | static void nfs4blocklayout_net_exit(struct net *net) | ||
250 | { | ||
251 | struct nfs_net *nn = net_generic(net, nfs_net_id); | ||
252 | |||
253 | nfs4blocklayout_unregister_net(net, nn->bl_device_pipe); | ||
254 | rpc_destroy_pipe_data(nn->bl_device_pipe); | ||
255 | nn->bl_device_pipe = NULL; | ||
256 | } | ||
257 | |||
258 | static struct pernet_operations nfs4blocklayout_net_ops = { | ||
259 | .init = nfs4blocklayout_net_init, | ||
260 | .exit = nfs4blocklayout_net_exit, | ||
261 | }; | ||
262 | |||
263 | int __init bl_init_pipefs(void) | ||
264 | { | ||
265 | int ret; | ||
266 | |||
267 | ret = rpc_pipefs_notifier_register(&nfs4blocklayout_block); | ||
268 | if (ret) | ||
269 | goto out; | ||
270 | ret = register_pernet_subsys(&nfs4blocklayout_net_ops); | ||
271 | if (ret) | ||
272 | goto out_unregister_notifier; | ||
273 | return 0; | ||
274 | |||
275 | out_unregister_notifier: | ||
276 | rpc_pipefs_notifier_unregister(&nfs4blocklayout_block); | ||
277 | out: | ||
278 | return ret; | ||
279 | } | ||
280 | |||
281 | void __exit bl_cleanup_pipefs(void) | ||
282 | { | ||
283 | rpc_pipefs_notifier_unregister(&nfs4blocklayout_block); | ||
284 | unregister_pernet_subsys(&nfs4blocklayout_net_ops); | ||
285 | } | ||
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 41db5258e7a7..73466b934090 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c | |||
@@ -171,14 +171,26 @@ static u32 initiate_file_draining(struct nfs_client *clp, | |||
171 | goto out; | 171 | goto out; |
172 | 172 | ||
173 | ino = lo->plh_inode; | 173 | ino = lo->plh_inode; |
174 | |||
175 | spin_lock(&ino->i_lock); | ||
176 | pnfs_set_layout_stateid(lo, &args->cbl_stateid, true); | ||
177 | spin_unlock(&ino->i_lock); | ||
178 | |||
179 | pnfs_layoutcommit_inode(ino, false); | ||
180 | |||
174 | spin_lock(&ino->i_lock); | 181 | spin_lock(&ino->i_lock); |
175 | if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || | 182 | if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || |
176 | pnfs_mark_matching_lsegs_invalid(lo, &free_me_list, | 183 | pnfs_mark_matching_lsegs_invalid(lo, &free_me_list, |
177 | &args->cbl_range)) | 184 | &args->cbl_range)) { |
178 | rv = NFS4ERR_DELAY; | 185 | rv = NFS4ERR_DELAY; |
179 | else | 186 | goto unlock; |
180 | rv = NFS4ERR_NOMATCHING_LAYOUT; | 187 | } |
181 | pnfs_set_layout_stateid(lo, &args->cbl_stateid, true); | 188 | |
189 | if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) { | ||
190 | NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo, | ||
191 | &args->cbl_range); | ||
192 | } | ||
193 | unlock: | ||
182 | spin_unlock(&ino->i_lock); | 194 | spin_unlock(&ino->i_lock); |
183 | pnfs_free_lseg_list(&free_me_list); | 195 | pnfs_free_lseg_list(&free_me_list); |
184 | pnfs_put_layout_hdr(lo); | 196 | pnfs_put_layout_hdr(lo); |
@@ -277,9 +289,6 @@ __be32 nfs4_callback_devicenotify(struct cb_devicenotifyargs *args, | |||
277 | } | 289 | } |
278 | 290 | ||
279 | found: | 291 | found: |
280 | if (dev->cbd_notify_type == NOTIFY_DEVICEID4_CHANGE) | ||
281 | dprintk("%s: NOTIFY_DEVICEID4_CHANGE not supported, " | ||
282 | "deleting instead\n", __func__); | ||
283 | nfs4_delete_deviceid(server->pnfs_curr_ld, clp, &dev->cbd_dev_id); | 292 | nfs4_delete_deviceid(server->pnfs_curr_ld, clp, &dev->cbd_dev_id); |
284 | } | 293 | } |
285 | 294 | ||
diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 94088517039f..f9f4845db989 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c | |||
@@ -1252,6 +1252,7 @@ static int nfs_server_list_open(struct inode *inode, struct file *file) | |||
1252 | * set up the iterator to start reading from the server list and return the first item | 1252 | * set up the iterator to start reading from the server list and return the first item |
1253 | */ | 1253 | */ |
1254 | static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos) | 1254 | static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos) |
1255 | __acquires(&nn->nfs_client_lock) | ||
1255 | { | 1256 | { |
1256 | struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id); | 1257 | struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id); |
1257 | 1258 | ||
@@ -1274,6 +1275,7 @@ static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos) | |||
1274 | * clean up after reading from the transports list | 1275 | * clean up after reading from the transports list |
1275 | */ | 1276 | */ |
1276 | static void nfs_server_list_stop(struct seq_file *p, void *v) | 1277 | static void nfs_server_list_stop(struct seq_file *p, void *v) |
1278 | __releases(&nn->nfs_client_lock) | ||
1277 | { | 1279 | { |
1278 | struct nfs_net *nn = net_generic(seq_file_net(p), nfs_net_id); | 1280 | struct nfs_net *nn = net_generic(seq_file_net(p), nfs_net_id); |
1279 | 1281 | ||
@@ -1326,6 +1328,7 @@ static int nfs_volume_list_open(struct inode *inode, struct file *file) | |||
1326 | * set up the iterator to start reading from the volume list and return the first item | 1328 | * set up the iterator to start reading from the volume list and return the first item |
1327 | */ | 1329 | */ |
1328 | static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos) | 1330 | static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos) |
1331 | __acquires(&nn->nfs_client_lock) | ||
1329 | { | 1332 | { |
1330 | struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id); | 1333 | struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id); |
1331 | 1334 | ||
@@ -1348,6 +1351,7 @@ static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos) | |||
1348 | * clean up after reading from the transports list | 1351 | * clean up after reading from the transports list |
1349 | */ | 1352 | */ |
1350 | static void nfs_volume_list_stop(struct seq_file *p, void *v) | 1353 | static void nfs_volume_list_stop(struct seq_file *p, void *v) |
1354 | __releases(&nn->nfs_client_lock) | ||
1351 | { | 1355 | { |
1352 | struct nfs_net *nn = net_generic(seq_file_net(p), nfs_net_id); | 1356 | struct nfs_net *nn = net_generic(seq_file_net(p), nfs_net_id); |
1353 | 1357 | ||
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 65ef6e00deee..dda4b8667c02 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c | |||
@@ -178,7 +178,6 @@ static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq, | |||
178 | return memcmp(verfp, &hdr->verf, sizeof(struct nfs_writeverf)); | 178 | return memcmp(verfp, &hdr->verf, sizeof(struct nfs_writeverf)); |
179 | } | 179 | } |
180 | 180 | ||
181 | #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) | ||
182 | /* | 181 | /* |
183 | * nfs_direct_cmp_commit_data_verf - compare verifier for commit data | 182 | * nfs_direct_cmp_commit_data_verf - compare verifier for commit data |
184 | * @dreq - direct request possibly spanning multiple servers | 183 | * @dreq - direct request possibly spanning multiple servers |
@@ -197,7 +196,6 @@ static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq, | |||
197 | WARN_ON_ONCE(verfp->committed < 0); | 196 | WARN_ON_ONCE(verfp->committed < 0); |
198 | return memcmp(verfp, &data->verf, sizeof(struct nfs_writeverf)); | 197 | return memcmp(verfp, &data->verf, sizeof(struct nfs_writeverf)); |
199 | } | 198 | } |
200 | #endif | ||
201 | 199 | ||
202 | /** | 200 | /** |
203 | * nfs_direct_IO - NFS address space operation for direct I/O | 201 | * nfs_direct_IO - NFS address space operation for direct I/O |
@@ -576,7 +574,6 @@ out: | |||
576 | return result; | 574 | return result; |
577 | } | 575 | } |
578 | 576 | ||
579 | #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) | ||
580 | static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) | 577 | static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) |
581 | { | 578 | { |
582 | struct nfs_pageio_descriptor desc; | 579 | struct nfs_pageio_descriptor desc; |
@@ -700,17 +697,6 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode | |||
700 | schedule_work(&dreq->work); /* Calls nfs_direct_write_schedule_work */ | 697 | schedule_work(&dreq->work); /* Calls nfs_direct_write_schedule_work */ |
701 | } | 698 | } |
702 | 699 | ||
703 | #else | ||
704 | static void nfs_direct_write_schedule_work(struct work_struct *work) | ||
705 | { | ||
706 | } | ||
707 | |||
708 | static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) | ||
709 | { | ||
710 | nfs_direct_complete(dreq, true); | ||
711 | } | ||
712 | #endif | ||
713 | |||
714 | static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) | 700 | static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) |
715 | { | 701 | { |
716 | struct nfs_direct_req *dreq = hdr->dreq; | 702 | struct nfs_direct_req *dreq = hdr->dreq; |
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 524dd80d1898..6920127c5eb7 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include "internal.h" | 36 | #include "internal.h" |
37 | #include "iostat.h" | 37 | #include "iostat.h" |
38 | #include "fscache.h" | 38 | #include "fscache.h" |
39 | #include "pnfs.h" | ||
39 | 40 | ||
40 | #include "nfstrace.h" | 41 | #include "nfstrace.h" |
41 | 42 | ||
@@ -327,6 +328,12 @@ static int nfs_want_read_modify_write(struct file *file, struct page *page, | |||
327 | unsigned int offset = pos & (PAGE_CACHE_SIZE - 1); | 328 | unsigned int offset = pos & (PAGE_CACHE_SIZE - 1); |
328 | unsigned int end = offset + len; | 329 | unsigned int end = offset + len; |
329 | 330 | ||
331 | if (pnfs_ld_read_whole_page(file->f_mapping->host)) { | ||
332 | if (!PageUptodate(page)) | ||
333 | return 1; | ||
334 | return 0; | ||
335 | } | ||
336 | |||
330 | if ((file->f_mode & FMODE_READ) && /* open for read? */ | 337 | if ((file->f_mode & FMODE_READ) && /* open for read? */ |
331 | !PageUptodate(page) && /* Uptodate? */ | 338 | !PageUptodate(page) && /* Uptodate? */ |
332 | !PagePrivate(page) && /* i/o request already? */ | 339 | !PagePrivate(page) && /* i/o request already? */ |
@@ -468,17 +475,26 @@ static int nfs_release_page(struct page *page, gfp_t gfp) | |||
468 | 475 | ||
469 | dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page); | 476 | dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page); |
470 | 477 | ||
471 | /* Only do I/O if gfp is a superset of GFP_KERNEL, and we're not | 478 | /* Always try to initiate a 'commit' if relevant, but only |
472 | * doing this memory reclaim for a fs-related allocation. | 479 | * wait for it if __GFP_WAIT is set. Even then, only wait 1 |
480 | * second and only if the 'bdi' is not congested. | ||
481 | * Waiting indefinitely can cause deadlocks when the NFS | ||
482 | * server is on this machine, when a new TCP connection is | ||
483 | * needed and in other rare cases. There is no particular | ||
484 | * need to wait extensively here. A short wait has the | ||
485 | * benefit that someone else can worry about the freezer. | ||
473 | */ | 486 | */ |
474 | if (mapping && (gfp & GFP_KERNEL) == GFP_KERNEL && | 487 | if (mapping) { |
475 | !(current->flags & PF_FSTRANS)) { | 488 | struct nfs_server *nfss = NFS_SERVER(mapping->host); |
476 | int how = FLUSH_SYNC; | 489 | nfs_commit_inode(mapping->host, 0); |
477 | 490 | if ((gfp & __GFP_WAIT) && | |
478 | /* Don't let kswapd deadlock waiting for OOM RPC calls */ | 491 | !bdi_write_congested(&nfss->backing_dev_info)) { |
479 | if (current_is_kswapd()) | 492 | wait_on_page_bit_killable_timeout(page, PG_private, |
480 | how = 0; | 493 | HZ); |
481 | nfs_commit_inode(mapping->host, how); | 494 | if (PagePrivate(page)) |
495 | set_bdi_congested(&nfss->backing_dev_info, | ||
496 | BLK_RW_ASYNC); | ||
497 | } | ||
482 | } | 498 | } |
483 | /* If PagePrivate() is set, then the page is not freeable */ | 499 | /* If PagePrivate() is set, then the page is not freeable */ |
484 | if (PagePrivate(page)) | 500 | if (PagePrivate(page)) |
@@ -539,13 +555,25 @@ static int nfs_launder_page(struct page *page) | |||
539 | static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file, | 555 | static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file, |
540 | sector_t *span) | 556 | sector_t *span) |
541 | { | 557 | { |
558 | int ret; | ||
559 | struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host); | ||
560 | |||
542 | *span = sis->pages; | 561 | *span = sis->pages; |
543 | return xs_swapper(NFS_CLIENT(file->f_mapping->host)->cl_xprt, 1); | 562 | |
563 | rcu_read_lock(); | ||
564 | ret = xs_swapper(rcu_dereference(clnt->cl_xprt), 1); | ||
565 | rcu_read_unlock(); | ||
566 | |||
567 | return ret; | ||
544 | } | 568 | } |
545 | 569 | ||
546 | static void nfs_swap_deactivate(struct file *file) | 570 | static void nfs_swap_deactivate(struct file *file) |
547 | { | 571 | { |
548 | xs_swapper(NFS_CLIENT(file->f_mapping->host)->cl_xprt, 0); | 572 | struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host); |
573 | |||
574 | rcu_read_lock(); | ||
575 | xs_swapper(rcu_dereference(clnt->cl_xprt), 0); | ||
576 | rcu_read_unlock(); | ||
549 | } | 577 | } |
550 | #endif | 578 | #endif |
551 | 579 | ||
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 90978075f730..abc5056999d6 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c | |||
@@ -265,7 +265,7 @@ filelayout_set_layoutcommit(struct nfs_pgio_header *hdr) | |||
265 | { | 265 | { |
266 | 266 | ||
267 | if (FILELAYOUT_LSEG(hdr->lseg)->commit_through_mds || | 267 | if (FILELAYOUT_LSEG(hdr->lseg)->commit_through_mds || |
268 | hdr->res.verf->committed == NFS_FILE_SYNC) | 268 | hdr->res.verf->committed != NFS_DATA_SYNC) |
269 | return; | 269 | return; |
270 | 270 | ||
271 | pnfs_set_layoutcommit(hdr); | 271 | pnfs_set_layoutcommit(hdr); |
@@ -403,6 +403,9 @@ static int filelayout_commit_done_cb(struct rpc_task *task, | |||
403 | return -EAGAIN; | 403 | return -EAGAIN; |
404 | } | 404 | } |
405 | 405 | ||
406 | if (data->verf.committed == NFS_UNSTABLE) | ||
407 | pnfs_commit_set_layoutcommit(data); | ||
408 | |||
406 | return 0; | 409 | return 0; |
407 | } | 410 | } |
408 | 411 | ||
@@ -646,18 +649,15 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, | |||
646 | } | 649 | } |
647 | 650 | ||
648 | /* find and reference the deviceid */ | 651 | /* find and reference the deviceid */ |
649 | d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode)->pnfs_curr_ld, | 652 | d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), id, |
650 | NFS_SERVER(lo->plh_inode)->nfs_client, id); | 653 | lo->plh_lc_cred, gfp_flags); |
651 | if (d == NULL) { | 654 | if (d == NULL) |
652 | dsaddr = filelayout_get_device_info(lo->plh_inode, id, | 655 | goto out; |
653 | lo->plh_lc_cred, gfp_flags); | 656 | |
654 | if (dsaddr == NULL) | 657 | dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node); |
655 | goto out; | ||
656 | } else | ||
657 | dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node); | ||
658 | /* Found deviceid is unavailable */ | 658 | /* Found deviceid is unavailable */ |
659 | if (filelayout_test_devid_unavailable(&dsaddr->id_node)) | 659 | if (filelayout_test_devid_unavailable(&dsaddr->id_node)) |
660 | goto out_put; | 660 | goto out_put; |
661 | 661 | ||
662 | fl->dsaddr = dsaddr; | 662 | fl->dsaddr = dsaddr; |
663 | 663 | ||
@@ -1368,6 +1368,17 @@ out: | |||
1368 | cinfo->ds->ncommitting = 0; | 1368 | cinfo->ds->ncommitting = 0; |
1369 | return PNFS_ATTEMPTED; | 1369 | return PNFS_ATTEMPTED; |
1370 | } | 1370 | } |
1371 | static struct nfs4_deviceid_node * | ||
1372 | filelayout_alloc_deviceid_node(struct nfs_server *server, | ||
1373 | struct pnfs_device *pdev, gfp_t gfp_flags) | ||
1374 | { | ||
1375 | struct nfs4_file_layout_dsaddr *dsaddr; | ||
1376 | |||
1377 | dsaddr = nfs4_fl_alloc_deviceid_node(server, pdev, gfp_flags); | ||
1378 | if (!dsaddr) | ||
1379 | return NULL; | ||
1380 | return &dsaddr->id_node; | ||
1381 | } | ||
1371 | 1382 | ||
1372 | static void | 1383 | static void |
1373 | filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d) | 1384 | filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d) |
@@ -1420,6 +1431,7 @@ static struct pnfs_layoutdriver_type filelayout_type = { | |||
1420 | .commit_pagelist = filelayout_commit_pagelist, | 1431 | .commit_pagelist = filelayout_commit_pagelist, |
1421 | .read_pagelist = filelayout_read_pagelist, | 1432 | .read_pagelist = filelayout_read_pagelist, |
1422 | .write_pagelist = filelayout_write_pagelist, | 1433 | .write_pagelist = filelayout_write_pagelist, |
1434 | .alloc_deviceid_node = filelayout_alloc_deviceid_node, | ||
1423 | .free_deviceid_node = filelayout_free_deveiceid_node, | 1435 | .free_deviceid_node = filelayout_free_deveiceid_node, |
1424 | }; | 1436 | }; |
1425 | 1437 | ||
diff --git a/fs/nfs/filelayout/filelayout.h b/fs/nfs/filelayout/filelayout.h index ffbddf2219ea..7c9f800c49d7 100644 --- a/fs/nfs/filelayout/filelayout.h +++ b/fs/nfs/filelayout/filelayout.h | |||
@@ -147,10 +147,11 @@ u32 nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset); | |||
147 | u32 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j); | 147 | u32 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j); |
148 | struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, | 148 | struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, |
149 | u32 ds_idx); | 149 | u32 ds_idx); |
150 | |||
151 | extern struct nfs4_file_layout_dsaddr * | ||
152 | nfs4_fl_alloc_deviceid_node(struct nfs_server *server, | ||
153 | struct pnfs_device *pdev, gfp_t gfp_flags); | ||
150 | extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); | 154 | extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); |
151 | extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); | 155 | extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); |
152 | struct nfs4_file_layout_dsaddr * | ||
153 | filelayout_get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, | ||
154 | struct rpc_cred *cred, gfp_t gfp_flags); | ||
155 | 156 | ||
156 | #endif /* FS_NFS_NFS4FILELAYOUT_H */ | 157 | #endif /* FS_NFS_NFS4FILELAYOUT_H */ |
diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c index 8540516f4d71..9bb806a76d99 100644 --- a/fs/nfs/filelayout/filelayoutdev.c +++ b/fs/nfs/filelayout/filelayoutdev.c | |||
@@ -484,8 +484,9 @@ out_err: | |||
484 | } | 484 | } |
485 | 485 | ||
486 | /* Decode opaque device data and return the result */ | 486 | /* Decode opaque device data and return the result */ |
487 | static struct nfs4_file_layout_dsaddr* | 487 | struct nfs4_file_layout_dsaddr * |
488 | decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) | 488 | nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, |
489 | gfp_t gfp_flags) | ||
489 | { | 490 | { |
490 | int i; | 491 | int i; |
491 | u32 cnt, num; | 492 | u32 cnt, num; |
@@ -570,10 +571,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) | |||
570 | dsaddr->stripe_indices = stripe_indices; | 571 | dsaddr->stripe_indices = stripe_indices; |
571 | stripe_indices = NULL; | 572 | stripe_indices = NULL; |
572 | dsaddr->ds_num = num; | 573 | dsaddr->ds_num = num; |
573 | nfs4_init_deviceid_node(&dsaddr->id_node, | 574 | nfs4_init_deviceid_node(&dsaddr->id_node, server, &pdev->dev_id); |
574 | NFS_SERVER(ino)->pnfs_curr_ld, | ||
575 | NFS_SERVER(ino)->nfs_client, | ||
576 | &pdev->dev_id); | ||
577 | 575 | ||
578 | INIT_LIST_HEAD(&dsaddrs); | 576 | INIT_LIST_HEAD(&dsaddrs); |
579 | 577 | ||
@@ -587,7 +585,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) | |||
587 | 585 | ||
588 | mp_count = be32_to_cpup(p); /* multipath count */ | 586 | mp_count = be32_to_cpup(p); /* multipath count */ |
589 | for (j = 0; j < mp_count; j++) { | 587 | for (j = 0; j < mp_count; j++) { |
590 | da = decode_ds_addr(NFS_SERVER(ino)->nfs_client->cl_net, | 588 | da = decode_ds_addr(server->nfs_client->cl_net, |
591 | &stream, gfp_flags); | 589 | &stream, gfp_flags); |
592 | if (da) | 590 | if (da) |
593 | list_add_tail(&da->da_node, &dsaddrs); | 591 | list_add_tail(&da->da_node, &dsaddrs); |
@@ -637,102 +635,6 @@ out_err: | |||
637 | return NULL; | 635 | return NULL; |
638 | } | 636 | } |
639 | 637 | ||
640 | /* | ||
641 | * Decode the opaque device specified in 'dev' and add it to the cache of | ||
642 | * available devices. | ||
643 | */ | ||
644 | static struct nfs4_file_layout_dsaddr * | ||
645 | decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_flags) | ||
646 | { | ||
647 | struct nfs4_deviceid_node *d; | ||
648 | struct nfs4_file_layout_dsaddr *n, *new; | ||
649 | |||
650 | new = decode_device(inode, dev, gfp_flags); | ||
651 | if (!new) { | ||
652 | printk(KERN_WARNING "NFS: %s: Could not decode or add device\n", | ||
653 | __func__); | ||
654 | return NULL; | ||
655 | } | ||
656 | |||
657 | d = nfs4_insert_deviceid_node(&new->id_node); | ||
658 | n = container_of(d, struct nfs4_file_layout_dsaddr, id_node); | ||
659 | if (n != new) { | ||
660 | nfs4_fl_free_deviceid(new); | ||
661 | return n; | ||
662 | } | ||
663 | |||
664 | return new; | ||
665 | } | ||
666 | |||
667 | /* | ||
668 | * Retrieve the information for dev_id, add it to the list | ||
669 | * of available devices, and return it. | ||
670 | */ | ||
671 | struct nfs4_file_layout_dsaddr * | ||
672 | filelayout_get_device_info(struct inode *inode, | ||
673 | struct nfs4_deviceid *dev_id, | ||
674 | struct rpc_cred *cred, | ||
675 | gfp_t gfp_flags) | ||
676 | { | ||
677 | struct pnfs_device *pdev = NULL; | ||
678 | u32 max_resp_sz; | ||
679 | int max_pages; | ||
680 | struct page **pages = NULL; | ||
681 | struct nfs4_file_layout_dsaddr *dsaddr = NULL; | ||
682 | int rc, i; | ||
683 | struct nfs_server *server = NFS_SERVER(inode); | ||
684 | |||
685 | /* | ||
686 | * Use the session max response size as the basis for setting | ||
687 | * GETDEVICEINFO's maxcount | ||
688 | */ | ||
689 | max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; | ||
690 | max_pages = nfs_page_array_len(0, max_resp_sz); | ||
691 | dprintk("%s inode %p max_resp_sz %u max_pages %d\n", | ||
692 | __func__, inode, max_resp_sz, max_pages); | ||
693 | |||
694 | pdev = kzalloc(sizeof(struct pnfs_device), gfp_flags); | ||
695 | if (pdev == NULL) | ||
696 | return NULL; | ||
697 | |||
698 | pages = kcalloc(max_pages, sizeof(struct page *), gfp_flags); | ||
699 | if (pages == NULL) { | ||
700 | kfree(pdev); | ||
701 | return NULL; | ||
702 | } | ||
703 | for (i = 0; i < max_pages; i++) { | ||
704 | pages[i] = alloc_page(gfp_flags); | ||
705 | if (!pages[i]) | ||
706 | goto out_free; | ||
707 | } | ||
708 | |||
709 | memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id)); | ||
710 | pdev->layout_type = LAYOUT_NFSV4_1_FILES; | ||
711 | pdev->pages = pages; | ||
712 | pdev->pgbase = 0; | ||
713 | pdev->pglen = max_resp_sz; | ||
714 | pdev->mincount = 0; | ||
715 | pdev->maxcount = max_resp_sz - nfs41_maxgetdevinfo_overhead; | ||
716 | |||
717 | rc = nfs4_proc_getdeviceinfo(server, pdev, cred); | ||
718 | dprintk("%s getdevice info returns %d\n", __func__, rc); | ||
719 | if (rc) | ||
720 | goto out_free; | ||
721 | |||
722 | /* | ||
723 | * Found new device, need to decode it and then add it to the | ||
724 | * list of known devices for this mountpoint. | ||
725 | */ | ||
726 | dsaddr = decode_and_add_device(inode, pdev, gfp_flags); | ||
727 | out_free: | ||
728 | for (i = 0; i < max_pages; i++) | ||
729 | __free_page(pages[i]); | ||
730 | kfree(pages); | ||
731 | kfree(pdev); | ||
732 | dprintk("<-- %s dsaddr %p\n", __func__, dsaddr); | ||
733 | return dsaddr; | ||
734 | } | ||
735 | |||
736 | void | 638 | void |
737 | nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) | 639 | nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) |
738 | { | 640 | { |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 577a36f0a510..141c9f4a40de 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -505,7 +505,9 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
505 | attr->ia_valid &= ~ATTR_MODE; | 505 | attr->ia_valid &= ~ATTR_MODE; |
506 | 506 | ||
507 | if (attr->ia_valid & ATTR_SIZE) { | 507 | if (attr->ia_valid & ATTR_SIZE) { |
508 | if (!S_ISREG(inode->i_mode) || attr->ia_size == i_size_read(inode)) | 508 | BUG_ON(!S_ISREG(inode->i_mode)); |
509 | |||
510 | if (attr->ia_size == i_size_read(inode)) | ||
509 | attr->ia_valid &= ~ATTR_SIZE; | 511 | attr->ia_valid &= ~ATTR_SIZE; |
510 | } | 512 | } |
511 | 513 | ||
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 9056622d2230..14ae6f20a172 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
@@ -218,13 +218,6 @@ static inline void nfs_fs_proc_exit(void) | |||
218 | int nfs_sockaddr_match_ipaddr(const struct sockaddr *, const struct sockaddr *); | 218 | int nfs_sockaddr_match_ipaddr(const struct sockaddr *, const struct sockaddr *); |
219 | #endif | 219 | #endif |
220 | 220 | ||
221 | /* nfs3client.c */ | ||
222 | #if IS_ENABLED(CONFIG_NFS_V3) | ||
223 | struct nfs_server *nfs3_create_server(struct nfs_mount_info *, struct nfs_subversion *); | ||
224 | struct nfs_server *nfs3_clone_server(struct nfs_server *, struct nfs_fh *, | ||
225 | struct nfs_fattr *, rpc_authflavor_t); | ||
226 | #endif | ||
227 | |||
228 | /* callback_xdr.c */ | 221 | /* callback_xdr.c */ |
229 | extern struct svc_version nfs4_callback_version1; | 222 | extern struct svc_version nfs4_callback_version1; |
230 | extern struct svc_version nfs4_callback_version4; | 223 | extern struct svc_version nfs4_callback_version4; |
diff --git a/fs/nfs/nfs3_fs.h b/fs/nfs/nfs3_fs.h new file mode 100644 index 000000000000..333ae4068506 --- /dev/null +++ b/fs/nfs/nfs3_fs.h | |||
@@ -0,0 +1,34 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2014 Anna Schumaker. | ||
3 | * | ||
4 | * NFSv3-specific filesystem definitions and declarations | ||
5 | */ | ||
6 | #ifndef __LINUX_FS_NFS_NFS3_FS_H | ||
7 | #define __LINUX_FS_NFS_NFS3_FS_H | ||
8 | |||
9 | /* | ||
10 | * nfs3acl.c | ||
11 | */ | ||
12 | #ifdef CONFIG_NFS_V3_ACL | ||
13 | extern struct posix_acl *nfs3_get_acl(struct inode *inode, int type); | ||
14 | extern int nfs3_set_acl(struct inode *inode, struct posix_acl *acl, int type); | ||
15 | extern int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, | ||
16 | struct posix_acl *dfacl); | ||
17 | extern ssize_t nfs3_listxattr(struct dentry *, char *, size_t); | ||
18 | extern const struct xattr_handler *nfs3_xattr_handlers[]; | ||
19 | #else | ||
20 | static inline int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, | ||
21 | struct posix_acl *dfacl) | ||
22 | { | ||
23 | return 0; | ||
24 | } | ||
25 | #define nfs3_listxattr NULL | ||
26 | #endif /* CONFIG_NFS_V3_ACL */ | ||
27 | |||
28 | /* nfs3client.c */ | ||
29 | struct nfs_server *nfs3_create_server(struct nfs_mount_info *, struct nfs_subversion *); | ||
30 | struct nfs_server *nfs3_clone_server(struct nfs_server *, struct nfs_fh *, | ||
31 | struct nfs_fattr *, rpc_authflavor_t); | ||
32 | |||
33 | |||
34 | #endif /* __LINUX_FS_NFS_NFS3_FS_H */ | ||
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 24c6898159cc..658e586ca438 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c | |||
@@ -7,6 +7,7 @@ | |||
7 | #include <linux/nfsacl.h> | 7 | #include <linux/nfsacl.h> |
8 | 8 | ||
9 | #include "internal.h" | 9 | #include "internal.h" |
10 | #include "nfs3_fs.h" | ||
10 | 11 | ||
11 | #define NFSDBG_FACILITY NFSDBG_PROC | 12 | #define NFSDBG_FACILITY NFSDBG_PROC |
12 | 13 | ||
diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c index b3fc65ef39ca..8c1b437c5403 100644 --- a/fs/nfs/nfs3client.c +++ b/fs/nfs/nfs3client.c | |||
@@ -1,6 +1,7 @@ | |||
1 | #include <linux/nfs_fs.h> | 1 | #include <linux/nfs_fs.h> |
2 | #include <linux/nfs_mount.h> | 2 | #include <linux/nfs_mount.h> |
3 | #include "internal.h" | 3 | #include "internal.h" |
4 | #include "nfs3_fs.h" | ||
4 | 5 | ||
5 | #ifdef CONFIG_NFS_V3_ACL | 6 | #ifdef CONFIG_NFS_V3_ACL |
6 | static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program }; | 7 | static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program }; |
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 809670eba52a..524f9f837408 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c | |||
@@ -22,6 +22,7 @@ | |||
22 | 22 | ||
23 | #include "iostat.h" | 23 | #include "iostat.h" |
24 | #include "internal.h" | 24 | #include "internal.h" |
25 | #include "nfs3_fs.h" | ||
25 | 26 | ||
26 | #define NFSDBG_FACILITY NFSDBG_PROC | 27 | #define NFSDBG_FACILITY NFSDBG_PROC |
27 | 28 | ||
diff --git a/fs/nfs/nfs3super.c b/fs/nfs/nfs3super.c index d6a98949af19..6af29c2da352 100644 --- a/fs/nfs/nfs3super.c +++ b/fs/nfs/nfs3super.c | |||
@@ -4,6 +4,7 @@ | |||
4 | #include <linux/module.h> | 4 | #include <linux/module.h> |
5 | #include <linux/nfs_fs.h> | 5 | #include <linux/nfs_fs.h> |
6 | #include "internal.h" | 6 | #include "internal.h" |
7 | #include "nfs3_fs.h" | ||
7 | #include "nfs.h" | 8 | #include "nfs.h" |
8 | 9 | ||
9 | static struct nfs_subversion nfs_v3 = { | 10 | static struct nfs_subversion nfs_v3 = { |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0422d77b73c7..5aa55c132aa2 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -77,7 +77,7 @@ struct nfs4_opendata; | |||
77 | static int _nfs4_proc_open(struct nfs4_opendata *data); | 77 | static int _nfs4_proc_open(struct nfs4_opendata *data); |
78 | static int _nfs4_recover_proc_open(struct nfs4_opendata *data); | 78 | static int _nfs4_recover_proc_open(struct nfs4_opendata *data); |
79 | static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); | 79 | static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); |
80 | static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *); | 80 | static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *, long *); |
81 | static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); | 81 | static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); |
82 | static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *label); | 82 | static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *label); |
83 | static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label); | 83 | static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label); |
@@ -314,20 +314,30 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent | |||
314 | kunmap_atomic(start); | 314 | kunmap_atomic(start); |
315 | } | 315 | } |
316 | 316 | ||
317 | static long nfs4_update_delay(long *timeout) | ||
318 | { | ||
319 | long ret; | ||
320 | if (!timeout) | ||
321 | return NFS4_POLL_RETRY_MAX; | ||
322 | if (*timeout <= 0) | ||
323 | *timeout = NFS4_POLL_RETRY_MIN; | ||
324 | if (*timeout > NFS4_POLL_RETRY_MAX) | ||
325 | *timeout = NFS4_POLL_RETRY_MAX; | ||
326 | ret = *timeout; | ||
327 | *timeout <<= 1; | ||
328 | return ret; | ||
329 | } | ||
330 | |||
317 | static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) | 331 | static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) |
318 | { | 332 | { |
319 | int res = 0; | 333 | int res = 0; |
320 | 334 | ||
321 | might_sleep(); | 335 | might_sleep(); |
322 | 336 | ||
323 | if (*timeout <= 0) | 337 | freezable_schedule_timeout_killable_unsafe( |
324 | *timeout = NFS4_POLL_RETRY_MIN; | 338 | nfs4_update_delay(timeout)); |
325 | if (*timeout > NFS4_POLL_RETRY_MAX) | ||
326 | *timeout = NFS4_POLL_RETRY_MAX; | ||
327 | freezable_schedule_timeout_killable_unsafe(*timeout); | ||
328 | if (fatal_signal_pending(current)) | 339 | if (fatal_signal_pending(current)) |
329 | res = -ERESTARTSYS; | 340 | res = -ERESTARTSYS; |
330 | *timeout <<= 1; | ||
331 | return res; | 341 | return res; |
332 | } | 342 | } |
333 | 343 | ||
@@ -1307,15 +1317,13 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) | |||
1307 | int ret = -EAGAIN; | 1317 | int ret = -EAGAIN; |
1308 | 1318 | ||
1309 | for (;;) { | 1319 | for (;;) { |
1320 | spin_lock(&state->owner->so_lock); | ||
1310 | if (can_open_cached(state, fmode, open_mode)) { | 1321 | if (can_open_cached(state, fmode, open_mode)) { |
1311 | spin_lock(&state->owner->so_lock); | 1322 | update_open_stateflags(state, fmode); |
1312 | if (can_open_cached(state, fmode, open_mode)) { | ||
1313 | update_open_stateflags(state, fmode); | ||
1314 | spin_unlock(&state->owner->so_lock); | ||
1315 | goto out_return_state; | ||
1316 | } | ||
1317 | spin_unlock(&state->owner->so_lock); | 1323 | spin_unlock(&state->owner->so_lock); |
1324 | goto out_return_state; | ||
1318 | } | 1325 | } |
1326 | spin_unlock(&state->owner->so_lock); | ||
1319 | rcu_read_lock(); | 1327 | rcu_read_lock(); |
1320 | delegation = rcu_dereference(nfsi->delegation); | 1328 | delegation = rcu_dereference(nfsi->delegation); |
1321 | if (!can_open_delegated(delegation, fmode)) { | 1329 | if (!can_open_delegated(delegation, fmode)) { |
@@ -2589,7 +2597,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) | |||
2589 | if (calldata->arg.fmode == 0) | 2597 | if (calldata->arg.fmode == 0) |
2590 | break; | 2598 | break; |
2591 | default: | 2599 | default: |
2592 | if (nfs4_async_handle_error(task, server, state) == -EAGAIN) { | 2600 | if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN) { |
2593 | rpc_restart_call_prepare(task); | 2601 | rpc_restart_call_prepare(task); |
2594 | goto out_release; | 2602 | goto out_release; |
2595 | } | 2603 | } |
@@ -3217,7 +3225,9 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, | |||
3217 | struct nfs4_label *label = NULL; | 3225 | struct nfs4_label *label = NULL; |
3218 | int status; | 3226 | int status; |
3219 | 3227 | ||
3220 | if (pnfs_ld_layoutret_on_setattr(inode)) | 3228 | if (pnfs_ld_layoutret_on_setattr(inode) && |
3229 | sattr->ia_valid & ATTR_SIZE && | ||
3230 | sattr->ia_size < i_size_read(inode)) | ||
3221 | pnfs_commit_and_return_layout(inode); | 3231 | pnfs_commit_and_return_layout(inode); |
3222 | 3232 | ||
3223 | nfs_fattr_init(fattr); | 3233 | nfs_fattr_init(fattr); |
@@ -3576,7 +3586,8 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir) | |||
3576 | 3586 | ||
3577 | if (!nfs4_sequence_done(task, &res->seq_res)) | 3587 | if (!nfs4_sequence_done(task, &res->seq_res)) |
3578 | return 0; | 3588 | return 0; |
3579 | if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN) | 3589 | if (nfs4_async_handle_error(task, res->server, NULL, |
3590 | &data->timeout) == -EAGAIN) | ||
3580 | return 0; | 3591 | return 0; |
3581 | update_changeattr(dir, &res->cinfo); | 3592 | update_changeattr(dir, &res->cinfo); |
3582 | return 1; | 3593 | return 1; |
@@ -3609,7 +3620,7 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir, | |||
3609 | 3620 | ||
3610 | if (!nfs4_sequence_done(task, &res->seq_res)) | 3621 | if (!nfs4_sequence_done(task, &res->seq_res)) |
3611 | return 0; | 3622 | return 0; |
3612 | if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN) | 3623 | if (nfs4_async_handle_error(task, res->server, NULL, &data->timeout) == -EAGAIN) |
3613 | return 0; | 3624 | return 0; |
3614 | 3625 | ||
3615 | update_changeattr(old_dir, &res->old_cinfo); | 3626 | update_changeattr(old_dir, &res->old_cinfo); |
@@ -4113,7 +4124,8 @@ static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_header *hdr) | |||
4113 | 4124 | ||
4114 | trace_nfs4_read(hdr, task->tk_status); | 4125 | trace_nfs4_read(hdr, task->tk_status); |
4115 | if (nfs4_async_handle_error(task, server, | 4126 | if (nfs4_async_handle_error(task, server, |
4116 | hdr->args.context->state) == -EAGAIN) { | 4127 | hdr->args.context->state, |
4128 | NULL) == -EAGAIN) { | ||
4117 | rpc_restart_call_prepare(task); | 4129 | rpc_restart_call_prepare(task); |
4118 | return -EAGAIN; | 4130 | return -EAGAIN; |
4119 | } | 4131 | } |
@@ -4181,10 +4193,11 @@ static int nfs4_write_done_cb(struct rpc_task *task, | |||
4181 | struct nfs_pgio_header *hdr) | 4193 | struct nfs_pgio_header *hdr) |
4182 | { | 4194 | { |
4183 | struct inode *inode = hdr->inode; | 4195 | struct inode *inode = hdr->inode; |
4184 | 4196 | ||
4185 | trace_nfs4_write(hdr, task->tk_status); | 4197 | trace_nfs4_write(hdr, task->tk_status); |
4186 | if (nfs4_async_handle_error(task, NFS_SERVER(inode), | 4198 | if (nfs4_async_handle_error(task, NFS_SERVER(inode), |
4187 | hdr->args.context->state) == -EAGAIN) { | 4199 | hdr->args.context->state, |
4200 | NULL) == -EAGAIN) { | ||
4188 | rpc_restart_call_prepare(task); | 4201 | rpc_restart_call_prepare(task); |
4189 | return -EAGAIN; | 4202 | return -EAGAIN; |
4190 | } | 4203 | } |
@@ -4264,7 +4277,8 @@ static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_commit_data *da | |||
4264 | struct inode *inode = data->inode; | 4277 | struct inode *inode = data->inode; |
4265 | 4278 | ||
4266 | trace_nfs4_commit(data, task->tk_status); | 4279 | trace_nfs4_commit(data, task->tk_status); |
4267 | if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) { | 4280 | if (nfs4_async_handle_error(task, NFS_SERVER(inode), |
4281 | NULL, NULL) == -EAGAIN) { | ||
4268 | rpc_restart_call_prepare(task); | 4282 | rpc_restart_call_prepare(task); |
4269 | return -EAGAIN; | 4283 | return -EAGAIN; |
4270 | } | 4284 | } |
@@ -4817,7 +4831,8 @@ out: | |||
4817 | 4831 | ||
4818 | 4832 | ||
4819 | static int | 4833 | static int |
4820 | nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs4_state *state) | 4834 | nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, |
4835 | struct nfs4_state *state, long *timeout) | ||
4821 | { | 4836 | { |
4822 | struct nfs_client *clp = server->nfs_client; | 4837 | struct nfs_client *clp = server->nfs_client; |
4823 | 4838 | ||
@@ -4867,6 +4882,8 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, | |||
4867 | #endif /* CONFIG_NFS_V4_1 */ | 4882 | #endif /* CONFIG_NFS_V4_1 */ |
4868 | case -NFS4ERR_DELAY: | 4883 | case -NFS4ERR_DELAY: |
4869 | nfs_inc_server_stats(server, NFSIOS_DELAY); | 4884 | nfs_inc_server_stats(server, NFSIOS_DELAY); |
4885 | rpc_delay(task, nfs4_update_delay(timeout)); | ||
4886 | goto restart_call; | ||
4870 | case -NFS4ERR_GRACE: | 4887 | case -NFS4ERR_GRACE: |
4871 | rpc_delay(task, NFS4_POLL_RETRY_MAX); | 4888 | rpc_delay(task, NFS4_POLL_RETRY_MAX); |
4872 | case -NFS4ERR_RETRY_UNCACHED_REP: | 4889 | case -NFS4ERR_RETRY_UNCACHED_REP: |
@@ -5107,8 +5124,8 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) | |||
5107 | pnfs_roc_set_barrier(data->inode, data->roc_barrier); | 5124 | pnfs_roc_set_barrier(data->inode, data->roc_barrier); |
5108 | break; | 5125 | break; |
5109 | default: | 5126 | default: |
5110 | if (nfs4_async_handle_error(task, data->res.server, NULL) == | 5127 | if (nfs4_async_handle_error(task, data->res.server, |
5111 | -EAGAIN) { | 5128 | NULL, NULL) == -EAGAIN) { |
5112 | rpc_restart_call_prepare(task); | 5129 | rpc_restart_call_prepare(task); |
5113 | return; | 5130 | return; |
5114 | } | 5131 | } |
@@ -5372,7 +5389,8 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) | |||
5372 | case -NFS4ERR_EXPIRED: | 5389 | case -NFS4ERR_EXPIRED: |
5373 | break; | 5390 | break; |
5374 | default: | 5391 | default: |
5375 | if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN) | 5392 | if (nfs4_async_handle_error(task, calldata->server, |
5393 | NULL, NULL) == -EAGAIN) | ||
5376 | rpc_restart_call_prepare(task); | 5394 | rpc_restart_call_prepare(task); |
5377 | } | 5395 | } |
5378 | nfs_release_seqid(calldata->arg.seqid); | 5396 | nfs_release_seqid(calldata->arg.seqid); |
@@ -5978,7 +5996,8 @@ static void nfs4_release_lockowner_done(struct rpc_task *task, void *calldata) | |||
5978 | break; | 5996 | break; |
5979 | case -NFS4ERR_LEASE_MOVED: | 5997 | case -NFS4ERR_LEASE_MOVED: |
5980 | case -NFS4ERR_DELAY: | 5998 | case -NFS4ERR_DELAY: |
5981 | if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) | 5999 | if (nfs4_async_handle_error(task, server, |
6000 | NULL, NULL) == -EAGAIN) | ||
5982 | rpc_restart_call_prepare(task); | 6001 | rpc_restart_call_prepare(task); |
5983 | } | 6002 | } |
5984 | } | 6003 | } |
@@ -7583,14 +7602,19 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) | |||
7583 | } else { | 7602 | } else { |
7584 | LIST_HEAD(head); | 7603 | LIST_HEAD(head); |
7585 | 7604 | ||
7605 | /* | ||
7606 | * Mark the bad layout state as invalid, then retry | ||
7607 | * with the current stateid. | ||
7608 | */ | ||
7586 | pnfs_mark_matching_lsegs_invalid(lo, &head, NULL); | 7609 | pnfs_mark_matching_lsegs_invalid(lo, &head, NULL); |
7587 | spin_unlock(&inode->i_lock); | 7610 | spin_unlock(&inode->i_lock); |
7588 | /* Mark the bad layout state as invalid, then | ||
7589 | * retry using the open stateid. */ | ||
7590 | pnfs_free_lseg_list(&head); | 7611 | pnfs_free_lseg_list(&head); |
7612 | |||
7613 | task->tk_status = 0; | ||
7614 | rpc_restart_call_prepare(task); | ||
7591 | } | 7615 | } |
7592 | } | 7616 | } |
7593 | if (nfs4_async_handle_error(task, server, state) == -EAGAIN) | 7617 | if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN) |
7594 | rpc_restart_call_prepare(task); | 7618 | rpc_restart_call_prepare(task); |
7595 | out: | 7619 | out: |
7596 | dprintk("<-- %s\n", __func__); | 7620 | dprintk("<-- %s\n", __func__); |
@@ -7750,7 +7774,7 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) | |||
7750 | case 0: | 7774 | case 0: |
7751 | break; | 7775 | break; |
7752 | case -NFS4ERR_DELAY: | 7776 | case -NFS4ERR_DELAY: |
7753 | if (nfs4_async_handle_error(task, server, NULL) != -EAGAIN) | 7777 | if (nfs4_async_handle_error(task, server, NULL, NULL) != -EAGAIN) |
7754 | break; | 7778 | break; |
7755 | rpc_restart_call_prepare(task); | 7779 | rpc_restart_call_prepare(task); |
7756 | return; | 7780 | return; |
@@ -7809,54 +7833,6 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp) | |||
7809 | return status; | 7833 | return status; |
7810 | } | 7834 | } |
7811 | 7835 | ||
7812 | /* | ||
7813 | * Retrieve the list of Data Server devices from the MDS. | ||
7814 | */ | ||
7815 | static int _nfs4_getdevicelist(struct nfs_server *server, | ||
7816 | const struct nfs_fh *fh, | ||
7817 | struct pnfs_devicelist *devlist) | ||
7818 | { | ||
7819 | struct nfs4_getdevicelist_args args = { | ||
7820 | .fh = fh, | ||
7821 | .layoutclass = server->pnfs_curr_ld->id, | ||
7822 | }; | ||
7823 | struct nfs4_getdevicelist_res res = { | ||
7824 | .devlist = devlist, | ||
7825 | }; | ||
7826 | struct rpc_message msg = { | ||
7827 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETDEVICELIST], | ||
7828 | .rpc_argp = &args, | ||
7829 | .rpc_resp = &res, | ||
7830 | }; | ||
7831 | int status; | ||
7832 | |||
7833 | dprintk("--> %s\n", __func__); | ||
7834 | status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, | ||
7835 | &res.seq_res, 0); | ||
7836 | dprintk("<-- %s status=%d\n", __func__, status); | ||
7837 | return status; | ||
7838 | } | ||
7839 | |||
7840 | int nfs4_proc_getdevicelist(struct nfs_server *server, | ||
7841 | const struct nfs_fh *fh, | ||
7842 | struct pnfs_devicelist *devlist) | ||
7843 | { | ||
7844 | struct nfs4_exception exception = { }; | ||
7845 | int err; | ||
7846 | |||
7847 | do { | ||
7848 | err = nfs4_handle_exception(server, | ||
7849 | _nfs4_getdevicelist(server, fh, devlist), | ||
7850 | &exception); | ||
7851 | } while (exception.retry); | ||
7852 | |||
7853 | dprintk("%s: err=%d, num_devs=%u\n", __func__, | ||
7854 | err, devlist->num_devs); | ||
7855 | |||
7856 | return err; | ||
7857 | } | ||
7858 | EXPORT_SYMBOL_GPL(nfs4_proc_getdevicelist); | ||
7859 | |||
7860 | static int | 7836 | static int |
7861 | _nfs4_proc_getdeviceinfo(struct nfs_server *server, | 7837 | _nfs4_proc_getdeviceinfo(struct nfs_server *server, |
7862 | struct pnfs_device *pdev, | 7838 | struct pnfs_device *pdev, |
@@ -7929,7 +7905,7 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata) | |||
7929 | case 0: | 7905 | case 0: |
7930 | break; | 7906 | break; |
7931 | default: | 7907 | default: |
7932 | if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) { | 7908 | if (nfs4_async_handle_error(task, server, NULL, NULL) == -EAGAIN) { |
7933 | rpc_restart_call_prepare(task); | 7909 | rpc_restart_call_prepare(task); |
7934 | return; | 7910 | return; |
7935 | } | 7911 | } |
@@ -8225,7 +8201,7 @@ static void nfs41_free_stateid_done(struct rpc_task *task, void *calldata) | |||
8225 | 8201 | ||
8226 | switch (task->tk_status) { | 8202 | switch (task->tk_status) { |
8227 | case -NFS4ERR_DELAY: | 8203 | case -NFS4ERR_DELAY: |
8228 | if (nfs4_async_handle_error(task, data->server, NULL) == -EAGAIN) | 8204 | if (nfs4_async_handle_error(task, data->server, NULL, NULL) == -EAGAIN) |
8229 | rpc_restart_call_prepare(task); | 8205 | rpc_restart_call_prepare(task); |
8230 | } | 8206 | } |
8231 | } | 8207 | } |
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 87b2d0e79797..5194933ed419 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c | |||
@@ -2345,6 +2345,7 @@ static void nfs4_state_manager(struct nfs_client *clp) | |||
2345 | status = nfs4_check_lease(clp); | 2345 | status = nfs4_check_lease(clp); |
2346 | if (status < 0) | 2346 | if (status < 0) |
2347 | goto out_error; | 2347 | goto out_error; |
2348 | continue; | ||
2348 | } | 2349 | } |
2349 | 2350 | ||
2350 | if (test_and_clear_bit(NFS4CLNT_MOVED, &clp->cl_state)) { | 2351 | if (test_and_clear_bit(NFS4CLNT_MOVED, &clp->cl_state)) { |
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index e13b59d8d9aa..005d03c5d274 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c | |||
@@ -362,25 +362,19 @@ static int nfs4_stat_to_errno(int); | |||
362 | XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) | 362 | XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) |
363 | #define encode_reclaim_complete_maxsz (op_encode_hdr_maxsz + 4) | 363 | #define encode_reclaim_complete_maxsz (op_encode_hdr_maxsz + 4) |
364 | #define decode_reclaim_complete_maxsz (op_decode_hdr_maxsz + 4) | 364 | #define decode_reclaim_complete_maxsz (op_decode_hdr_maxsz + 4) |
365 | #define encode_getdevicelist_maxsz (op_encode_hdr_maxsz + 4 + \ | 365 | #define encode_getdeviceinfo_maxsz (op_encode_hdr_maxsz + \ |
366 | encode_verifier_maxsz) | 366 | XDR_QUADLEN(NFS4_DEVICEID4_SIZE) + \ |
367 | #define decode_getdevicelist_maxsz (op_decode_hdr_maxsz + \ | 367 | 1 /* layout type */ + \ |
368 | 2 /* nfs_cookie4 gdlr_cookie */ + \ | 368 | 1 /* maxcount */ + \ |
369 | decode_verifier_maxsz \ | 369 | 1 /* bitmap size */ + \ |
370 | /* verifier4 gdlr_verifier */ + \ | 370 | 1 /* notification bitmap length */ + \ |
371 | 1 /* gdlr_deviceid_list count */ + \ | 371 | 1 /* notification bitmap, word 0 */) |
372 | XDR_QUADLEN(NFS4_PNFS_GETDEVLIST_MAXNUM * \ | ||
373 | NFS4_DEVICEID4_SIZE) \ | ||
374 | /* gdlr_deviceid_list */ + \ | ||
375 | 1 /* bool gdlr_eof */) | ||
376 | #define encode_getdeviceinfo_maxsz (op_encode_hdr_maxsz + 4 + \ | ||
377 | XDR_QUADLEN(NFS4_DEVICEID4_SIZE)) | ||
378 | #define decode_getdeviceinfo_maxsz (op_decode_hdr_maxsz + \ | 372 | #define decode_getdeviceinfo_maxsz (op_decode_hdr_maxsz + \ |
379 | 1 /* layout type */ + \ | 373 | 1 /* layout type */ + \ |
380 | 1 /* opaque devaddr4 length */ + \ | 374 | 1 /* opaque devaddr4 length */ + \ |
381 | /* devaddr4 payload is read into page */ \ | 375 | /* devaddr4 payload is read into page */ \ |
382 | 1 /* notification bitmap length */ + \ | 376 | 1 /* notification bitmap length */ + \ |
383 | 1 /* notification bitmap */) | 377 | 1 /* notification bitmap, word 0 */) |
384 | #define encode_layoutget_maxsz (op_encode_hdr_maxsz + 10 + \ | 378 | #define encode_layoutget_maxsz (op_encode_hdr_maxsz + 10 + \ |
385 | encode_stateid_maxsz) | 379 | encode_stateid_maxsz) |
386 | #define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \ | 380 | #define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \ |
@@ -395,7 +389,10 @@ static int nfs4_stat_to_errno(int); | |||
395 | 2 /* last byte written */ + \ | 389 | 2 /* last byte written */ + \ |
396 | 1 /* nt_timechanged (false) */ + \ | 390 | 1 /* nt_timechanged (false) */ + \ |
397 | 1 /* layoutupdate4 layout type */ + \ | 391 | 1 /* layoutupdate4 layout type */ + \ |
398 | 1 /* NULL filelayout layoutupdate4 payload */) | 392 | 1 /* layoutupdate4 opaqueue len */) |
393 | /* the actual content of layoutupdate4 should | ||
394 | be allocated by drivers and spliced in | ||
395 | using xdr_write_pages */ | ||
399 | #define decode_layoutcommit_maxsz (op_decode_hdr_maxsz + 3) | 396 | #define decode_layoutcommit_maxsz (op_decode_hdr_maxsz + 3) |
400 | #define encode_layoutreturn_maxsz (8 + op_encode_hdr_maxsz + \ | 397 | #define encode_layoutreturn_maxsz (8 + op_encode_hdr_maxsz + \ |
401 | encode_stateid_maxsz + \ | 398 | encode_stateid_maxsz + \ |
@@ -809,14 +806,6 @@ static int nfs4_stat_to_errno(int); | |||
809 | #define NFS4_dec_reclaim_complete_sz (compound_decode_hdr_maxsz + \ | 806 | #define NFS4_dec_reclaim_complete_sz (compound_decode_hdr_maxsz + \ |
810 | decode_sequence_maxsz + \ | 807 | decode_sequence_maxsz + \ |
811 | decode_reclaim_complete_maxsz) | 808 | decode_reclaim_complete_maxsz) |
812 | #define NFS4_enc_getdevicelist_sz (compound_encode_hdr_maxsz + \ | ||
813 | encode_sequence_maxsz + \ | ||
814 | encode_putfh_maxsz + \ | ||
815 | encode_getdevicelist_maxsz) | ||
816 | #define NFS4_dec_getdevicelist_sz (compound_decode_hdr_maxsz + \ | ||
817 | decode_sequence_maxsz + \ | ||
818 | decode_putfh_maxsz + \ | ||
819 | decode_getdevicelist_maxsz) | ||
820 | #define NFS4_enc_getdeviceinfo_sz (compound_encode_hdr_maxsz + \ | 809 | #define NFS4_enc_getdeviceinfo_sz (compound_encode_hdr_maxsz + \ |
821 | encode_sequence_maxsz +\ | 810 | encode_sequence_maxsz +\ |
822 | encode_getdeviceinfo_maxsz) | 811 | encode_getdeviceinfo_maxsz) |
@@ -1927,24 +1916,6 @@ static void encode_sequence(struct xdr_stream *xdr, | |||
1927 | 1916 | ||
1928 | #ifdef CONFIG_NFS_V4_1 | 1917 | #ifdef CONFIG_NFS_V4_1 |
1929 | static void | 1918 | static void |
1930 | encode_getdevicelist(struct xdr_stream *xdr, | ||
1931 | const struct nfs4_getdevicelist_args *args, | ||
1932 | struct compound_hdr *hdr) | ||
1933 | { | ||
1934 | __be32 *p; | ||
1935 | nfs4_verifier dummy = { | ||
1936 | .data = "dummmmmy", | ||
1937 | }; | ||
1938 | |||
1939 | encode_op_hdr(xdr, OP_GETDEVICELIST, decode_getdevicelist_maxsz, hdr); | ||
1940 | p = reserve_space(xdr, 16); | ||
1941 | *p++ = cpu_to_be32(args->layoutclass); | ||
1942 | *p++ = cpu_to_be32(NFS4_PNFS_GETDEVLIST_MAXNUM); | ||
1943 | xdr_encode_hyper(p, 0ULL); /* cookie */ | ||
1944 | encode_nfs4_verifier(xdr, &dummy); | ||
1945 | } | ||
1946 | |||
1947 | static void | ||
1948 | encode_getdeviceinfo(struct xdr_stream *xdr, | 1919 | encode_getdeviceinfo(struct xdr_stream *xdr, |
1949 | const struct nfs4_getdeviceinfo_args *args, | 1920 | const struct nfs4_getdeviceinfo_args *args, |
1950 | struct compound_hdr *hdr) | 1921 | struct compound_hdr *hdr) |
@@ -1952,12 +1923,15 @@ encode_getdeviceinfo(struct xdr_stream *xdr, | |||
1952 | __be32 *p; | 1923 | __be32 *p; |
1953 | 1924 | ||
1954 | encode_op_hdr(xdr, OP_GETDEVICEINFO, decode_getdeviceinfo_maxsz, hdr); | 1925 | encode_op_hdr(xdr, OP_GETDEVICEINFO, decode_getdeviceinfo_maxsz, hdr); |
1955 | p = reserve_space(xdr, 12 + NFS4_DEVICEID4_SIZE); | 1926 | p = reserve_space(xdr, NFS4_DEVICEID4_SIZE + 4 + 4); |
1956 | p = xdr_encode_opaque_fixed(p, args->pdev->dev_id.data, | 1927 | p = xdr_encode_opaque_fixed(p, args->pdev->dev_id.data, |
1957 | NFS4_DEVICEID4_SIZE); | 1928 | NFS4_DEVICEID4_SIZE); |
1958 | *p++ = cpu_to_be32(args->pdev->layout_type); | 1929 | *p++ = cpu_to_be32(args->pdev->layout_type); |
1959 | *p++ = cpu_to_be32(args->pdev->maxcount); /* gdia_maxcount */ | 1930 | *p++ = cpu_to_be32(args->pdev->maxcount); /* gdia_maxcount */ |
1960 | *p++ = cpu_to_be32(0); /* bitmap length 0 */ | 1931 | |
1932 | p = reserve_space(xdr, 4 + 4); | ||
1933 | *p++ = cpu_to_be32(1); /* bitmap length */ | ||
1934 | *p++ = cpu_to_be32(NOTIFY_DEVICEID4_CHANGE | NOTIFY_DEVICEID4_DELETE); | ||
1961 | } | 1935 | } |
1962 | 1936 | ||
1963 | static void | 1937 | static void |
@@ -1990,7 +1964,7 @@ encode_layoutget(struct xdr_stream *xdr, | |||
1990 | static int | 1964 | static int |
1991 | encode_layoutcommit(struct xdr_stream *xdr, | 1965 | encode_layoutcommit(struct xdr_stream *xdr, |
1992 | struct inode *inode, | 1966 | struct inode *inode, |
1993 | const struct nfs4_layoutcommit_args *args, | 1967 | struct nfs4_layoutcommit_args *args, |
1994 | struct compound_hdr *hdr) | 1968 | struct compound_hdr *hdr) |
1995 | { | 1969 | { |
1996 | __be32 *p; | 1970 | __be32 *p; |
@@ -2011,11 +1985,16 @@ encode_layoutcommit(struct xdr_stream *xdr, | |||
2011 | *p++ = cpu_to_be32(0); /* Never send time_modify_changed */ | 1985 | *p++ = cpu_to_be32(0); /* Never send time_modify_changed */ |
2012 | *p++ = cpu_to_be32(NFS_SERVER(args->inode)->pnfs_curr_ld->id);/* type */ | 1986 | *p++ = cpu_to_be32(NFS_SERVER(args->inode)->pnfs_curr_ld->id);/* type */ |
2013 | 1987 | ||
2014 | if (NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit) | 1988 | if (NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit) { |
2015 | NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit( | 1989 | NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit( |
2016 | NFS_I(inode)->layout, xdr, args); | 1990 | NFS_I(inode)->layout, xdr, args); |
2017 | else | 1991 | } else { |
2018 | encode_uint32(xdr, 0); /* no layout-type payload */ | 1992 | encode_uint32(xdr, args->layoutupdate_len); |
1993 | if (args->layoutupdate_pages) { | ||
1994 | xdr_write_pages(xdr, args->layoutupdate_pages, 0, | ||
1995 | args->layoutupdate_len); | ||
1996 | } | ||
1997 | } | ||
2019 | 1998 | ||
2020 | return 0; | 1999 | return 0; |
2021 | } | 2000 | } |
@@ -2893,24 +2872,6 @@ static void nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req, | |||
2893 | } | 2872 | } |
2894 | 2873 | ||
2895 | /* | 2874 | /* |
2896 | * Encode GETDEVICELIST request | ||
2897 | */ | ||
2898 | static void nfs4_xdr_enc_getdevicelist(struct rpc_rqst *req, | ||
2899 | struct xdr_stream *xdr, | ||
2900 | struct nfs4_getdevicelist_args *args) | ||
2901 | { | ||
2902 | struct compound_hdr hdr = { | ||
2903 | .minorversion = nfs4_xdr_minorversion(&args->seq_args), | ||
2904 | }; | ||
2905 | |||
2906 | encode_compound_hdr(xdr, req, &hdr); | ||
2907 | encode_sequence(xdr, &args->seq_args, &hdr); | ||
2908 | encode_putfh(xdr, args->fh, &hdr); | ||
2909 | encode_getdevicelist(xdr, args, &hdr); | ||
2910 | encode_nops(&hdr); | ||
2911 | } | ||
2912 | |||
2913 | /* | ||
2914 | * Encode GETDEVICEINFO request | 2875 | * Encode GETDEVICEINFO request |
2915 | */ | 2876 | */ |
2916 | static void nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, | 2877 | static void nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, |
@@ -5765,54 +5726,6 @@ out_overflow: | |||
5765 | } | 5726 | } |
5766 | 5727 | ||
5767 | #if defined(CONFIG_NFS_V4_1) | 5728 | #if defined(CONFIG_NFS_V4_1) |
5768 | /* | ||
5769 | * TODO: Need to handle case when EOF != true; | ||
5770 | */ | ||
5771 | static int decode_getdevicelist(struct xdr_stream *xdr, | ||
5772 | struct pnfs_devicelist *res) | ||
5773 | { | ||
5774 | __be32 *p; | ||
5775 | int status, i; | ||
5776 | nfs4_verifier verftemp; | ||
5777 | |||
5778 | status = decode_op_hdr(xdr, OP_GETDEVICELIST); | ||
5779 | if (status) | ||
5780 | return status; | ||
5781 | |||
5782 | p = xdr_inline_decode(xdr, 8 + 8 + 4); | ||
5783 | if (unlikely(!p)) | ||
5784 | goto out_overflow; | ||
5785 | |||
5786 | /* TODO: Skip cookie for now */ | ||
5787 | p += 2; | ||
5788 | |||
5789 | /* Read verifier */ | ||
5790 | p = xdr_decode_opaque_fixed(p, verftemp.data, NFS4_VERIFIER_SIZE); | ||
5791 | |||
5792 | res->num_devs = be32_to_cpup(p); | ||
5793 | |||
5794 | dprintk("%s: num_dev %d\n", __func__, res->num_devs); | ||
5795 | |||
5796 | if (res->num_devs > NFS4_PNFS_GETDEVLIST_MAXNUM) { | ||
5797 | printk(KERN_ERR "NFS: %s too many result dev_num %u\n", | ||
5798 | __func__, res->num_devs); | ||
5799 | return -EIO; | ||
5800 | } | ||
5801 | |||
5802 | p = xdr_inline_decode(xdr, | ||
5803 | res->num_devs * NFS4_DEVICEID4_SIZE + 4); | ||
5804 | if (unlikely(!p)) | ||
5805 | goto out_overflow; | ||
5806 | for (i = 0; i < res->num_devs; i++) | ||
5807 | p = xdr_decode_opaque_fixed(p, res->dev_id[i].data, | ||
5808 | NFS4_DEVICEID4_SIZE); | ||
5809 | res->eof = be32_to_cpup(p); | ||
5810 | return 0; | ||
5811 | out_overflow: | ||
5812 | print_overflow_msg(__func__, xdr); | ||
5813 | return -EIO; | ||
5814 | } | ||
5815 | |||
5816 | static int decode_getdeviceinfo(struct xdr_stream *xdr, | 5729 | static int decode_getdeviceinfo(struct xdr_stream *xdr, |
5817 | struct pnfs_device *pdev) | 5730 | struct pnfs_device *pdev) |
5818 | { | 5731 | { |
@@ -5862,9 +5775,16 @@ static int decode_getdeviceinfo(struct xdr_stream *xdr, | |||
5862 | p = xdr_inline_decode(xdr, 4 * len); | 5775 | p = xdr_inline_decode(xdr, 4 * len); |
5863 | if (unlikely(!p)) | 5776 | if (unlikely(!p)) |
5864 | goto out_overflow; | 5777 | goto out_overflow; |
5865 | for (i = 0; i < len; i++, p++) { | 5778 | |
5866 | if (be32_to_cpup(p)) { | 5779 | if (be32_to_cpup(p++) & |
5867 | dprintk("%s: notifications not supported\n", | 5780 | ~(NOTIFY_DEVICEID4_CHANGE | NOTIFY_DEVICEID4_DELETE)) { |
5781 | dprintk("%s: unsupported notification\n", | ||
5782 | __func__); | ||
5783 | } | ||
5784 | |||
5785 | for (i = 1; i < len; i++) { | ||
5786 | if (be32_to_cpup(p++)) { | ||
5787 | dprintk("%s: unsupported notification\n", | ||
5868 | __func__); | 5788 | __func__); |
5869 | return -EIO; | 5789 | return -EIO; |
5870 | } | 5790 | } |
@@ -7097,32 +7017,6 @@ static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp, | |||
7097 | } | 7017 | } |
7098 | 7018 | ||
7099 | /* | 7019 | /* |
7100 | * Decode GETDEVICELIST response | ||
7101 | */ | ||
7102 | static int nfs4_xdr_dec_getdevicelist(struct rpc_rqst *rqstp, | ||
7103 | struct xdr_stream *xdr, | ||
7104 | struct nfs4_getdevicelist_res *res) | ||
7105 | { | ||
7106 | struct compound_hdr hdr; | ||
7107 | int status; | ||
7108 | |||
7109 | dprintk("encoding getdevicelist!\n"); | ||
7110 | |||
7111 | status = decode_compound_hdr(xdr, &hdr); | ||
7112 | if (status != 0) | ||
7113 | goto out; | ||
7114 | status = decode_sequence(xdr, &res->seq_res, rqstp); | ||
7115 | if (status != 0) | ||
7116 | goto out; | ||
7117 | status = decode_putfh(xdr); | ||
7118 | if (status != 0) | ||
7119 | goto out; | ||
7120 | status = decode_getdevicelist(xdr, res->devlist); | ||
7121 | out: | ||
7122 | return status; | ||
7123 | } | ||
7124 | |||
7125 | /* | ||
7126 | * Decode GETDEVINFO response | 7020 | * Decode GETDEVINFO response |
7127 | */ | 7021 | */ |
7128 | static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp, | 7022 | static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp, |
@@ -7490,7 +7384,6 @@ struct rpc_procinfo nfs4_procedures[] = { | |||
7490 | PROC(SECINFO_NO_NAME, enc_secinfo_no_name, dec_secinfo_no_name), | 7384 | PROC(SECINFO_NO_NAME, enc_secinfo_no_name, dec_secinfo_no_name), |
7491 | PROC(TEST_STATEID, enc_test_stateid, dec_test_stateid), | 7385 | PROC(TEST_STATEID, enc_test_stateid, dec_test_stateid), |
7492 | PROC(FREE_STATEID, enc_free_stateid, dec_free_stateid), | 7386 | PROC(FREE_STATEID, enc_free_stateid, dec_free_stateid), |
7493 | PROC(GETDEVICELIST, enc_getdevicelist, dec_getdevicelist), | ||
7494 | PROC(BIND_CONN_TO_SESSION, | 7387 | PROC(BIND_CONN_TO_SESSION, |
7495 | enc_bind_conn_to_session, dec_bind_conn_to_session), | 7388 | enc_bind_conn_to_session, dec_bind_conn_to_session), |
7496 | PROC(DESTROY_CLIENTID, enc_destroy_clientid, dec_destroy_clientid), | 7389 | PROC(DESTROY_CLIENTID, enc_destroy_clientid, dec_destroy_clientid), |
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index ae05278b3761..c6e4bda63000 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c | |||
@@ -60,52 +60,6 @@ objio_free_deviceid_node(struct nfs4_deviceid_node *d) | |||
60 | kfree(de); | 60 | kfree(de); |
61 | } | 61 | } |
62 | 62 | ||
63 | static struct objio_dev_ent *_dev_list_find(const struct nfs_server *nfss, | ||
64 | const struct nfs4_deviceid *d_id) | ||
65 | { | ||
66 | struct nfs4_deviceid_node *d; | ||
67 | struct objio_dev_ent *de; | ||
68 | |||
69 | d = nfs4_find_get_deviceid(nfss->pnfs_curr_ld, nfss->nfs_client, d_id); | ||
70 | if (!d) | ||
71 | return NULL; | ||
72 | |||
73 | de = container_of(d, struct objio_dev_ent, id_node); | ||
74 | return de; | ||
75 | } | ||
76 | |||
77 | static struct objio_dev_ent * | ||
78 | _dev_list_add(const struct nfs_server *nfss, | ||
79 | const struct nfs4_deviceid *d_id, struct osd_dev *od, | ||
80 | gfp_t gfp_flags) | ||
81 | { | ||
82 | struct nfs4_deviceid_node *d; | ||
83 | struct objio_dev_ent *de = kzalloc(sizeof(*de), gfp_flags); | ||
84 | struct objio_dev_ent *n; | ||
85 | |||
86 | if (!de) { | ||
87 | dprintk("%s: -ENOMEM od=%p\n", __func__, od); | ||
88 | return NULL; | ||
89 | } | ||
90 | |||
91 | dprintk("%s: Adding od=%p\n", __func__, od); | ||
92 | nfs4_init_deviceid_node(&de->id_node, | ||
93 | nfss->pnfs_curr_ld, | ||
94 | nfss->nfs_client, | ||
95 | d_id); | ||
96 | de->od.od = od; | ||
97 | |||
98 | d = nfs4_insert_deviceid_node(&de->id_node); | ||
99 | n = container_of(d, struct objio_dev_ent, id_node); | ||
100 | if (n != de) { | ||
101 | dprintk("%s: Race with other n->od=%p\n", __func__, n->od.od); | ||
102 | objio_free_deviceid_node(&de->id_node); | ||
103 | de = n; | ||
104 | } | ||
105 | |||
106 | return de; | ||
107 | } | ||
108 | |||
109 | struct objio_segment { | 63 | struct objio_segment { |
110 | struct pnfs_layout_segment lseg; | 64 | struct pnfs_layout_segment lseg; |
111 | 65 | ||
@@ -130,29 +84,24 @@ struct objio_state { | |||
130 | 84 | ||
131 | /* Send and wait for a get_device_info of devices in the layout, | 85 | /* Send and wait for a get_device_info of devices in the layout, |
132 | then look them up with the osd_initiator library */ | 86 | then look them up with the osd_initiator library */ |
133 | static int objio_devices_lookup(struct pnfs_layout_hdr *pnfslay, | 87 | struct nfs4_deviceid_node * |
134 | struct objio_segment *objio_seg, unsigned c, struct nfs4_deviceid *d_id, | 88 | objio_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, |
135 | gfp_t gfp_flags) | 89 | gfp_t gfp_flags) |
136 | { | 90 | { |
137 | struct pnfs_osd_deviceaddr *deviceaddr; | 91 | struct pnfs_osd_deviceaddr *deviceaddr; |
138 | struct objio_dev_ent *ode; | 92 | struct objio_dev_ent *ode = NULL; |
139 | struct osd_dev *od; | 93 | struct osd_dev *od; |
140 | struct osd_dev_info odi; | 94 | struct osd_dev_info odi; |
141 | bool retry_flag = true; | 95 | bool retry_flag = true; |
96 | __be32 *p; | ||
142 | int err; | 97 | int err; |
143 | 98 | ||
144 | ode = _dev_list_find(NFS_SERVER(pnfslay->plh_inode), d_id); | 99 | deviceaddr = kzalloc(sizeof(*deviceaddr), gfp_flags); |
145 | if (ode) { | 100 | if (!deviceaddr) |
146 | objio_seg->oc.ods[c] = &ode->od; /* must use container_of */ | 101 | return NULL; |
147 | return 0; | ||
148 | } | ||
149 | 102 | ||
150 | err = objlayout_get_deviceinfo(pnfslay, d_id, &deviceaddr, gfp_flags); | 103 | p = page_address(pdev->pages[0]); |
151 | if (unlikely(err)) { | 104 | pnfs_osd_xdr_decode_deviceaddr(deviceaddr, p); |
152 | dprintk("%s: objlayout_get_deviceinfo dev(%llx:%llx) =>%d\n", | ||
153 | __func__, _DEVID_LO(d_id), _DEVID_HI(d_id), err); | ||
154 | return err; | ||
155 | } | ||
156 | 105 | ||
157 | odi.systemid_len = deviceaddr->oda_systemid.len; | 106 | odi.systemid_len = deviceaddr->oda_systemid.len; |
158 | if (odi.systemid_len > sizeof(odi.systemid)) { | 107 | if (odi.systemid_len > sizeof(odi.systemid)) { |
@@ -188,14 +137,24 @@ retry_lookup: | |||
188 | goto out; | 137 | goto out; |
189 | } | 138 | } |
190 | 139 | ||
191 | ode = _dev_list_add(NFS_SERVER(pnfslay->plh_inode), d_id, od, | ||
192 | gfp_flags); | ||
193 | objio_seg->oc.ods[c] = &ode->od; /* must use container_of */ | ||
194 | dprintk("Adding new dev_id(%llx:%llx)\n", | 140 | dprintk("Adding new dev_id(%llx:%llx)\n", |
195 | _DEVID_LO(d_id), _DEVID_HI(d_id)); | 141 | _DEVID_LO(&pdev->dev_id), _DEVID_HI(&pdev->dev_id)); |
142 | |||
143 | ode = kzalloc(sizeof(*ode), gfp_flags); | ||
144 | if (!ode) { | ||
145 | dprintk("%s: -ENOMEM od=%p\n", __func__, od); | ||
146 | goto out; | ||
147 | } | ||
148 | |||
149 | nfs4_init_deviceid_node(&ode->id_node, server, &pdev->dev_id); | ||
150 | kfree(deviceaddr); | ||
151 | |||
152 | ode->od.od = od; | ||
153 | return &ode->id_node; | ||
154 | |||
196 | out: | 155 | out: |
197 | objlayout_put_deviceinfo(deviceaddr); | 156 | kfree(deviceaddr); |
198 | return err; | 157 | return NULL; |
199 | } | 158 | } |
200 | 159 | ||
201 | static void copy_single_comp(struct ore_components *oc, unsigned c, | 160 | static void copy_single_comp(struct ore_components *oc, unsigned c, |
@@ -254,6 +213,7 @@ int objio_alloc_lseg(struct pnfs_layout_segment **outp, | |||
254 | struct xdr_stream *xdr, | 213 | struct xdr_stream *xdr, |
255 | gfp_t gfp_flags) | 214 | gfp_t gfp_flags) |
256 | { | 215 | { |
216 | struct nfs_server *server = NFS_SERVER(pnfslay->plh_inode); | ||
257 | struct objio_segment *objio_seg; | 217 | struct objio_segment *objio_seg; |
258 | struct pnfs_osd_xdr_decode_layout_iter iter; | 218 | struct pnfs_osd_xdr_decode_layout_iter iter; |
259 | struct pnfs_osd_layout layout; | 219 | struct pnfs_osd_layout layout; |
@@ -283,13 +243,21 @@ int objio_alloc_lseg(struct pnfs_layout_segment **outp, | |||
283 | objio_seg->oc.first_dev = layout.olo_comps_index; | 243 | objio_seg->oc.first_dev = layout.olo_comps_index; |
284 | cur_comp = 0; | 244 | cur_comp = 0; |
285 | while (pnfs_osd_xdr_decode_layout_comp(&src_comp, &iter, xdr, &err)) { | 245 | while (pnfs_osd_xdr_decode_layout_comp(&src_comp, &iter, xdr, &err)) { |
246 | struct nfs4_deviceid_node *d; | ||
247 | struct objio_dev_ent *ode; | ||
248 | |||
286 | copy_single_comp(&objio_seg->oc, cur_comp, &src_comp); | 249 | copy_single_comp(&objio_seg->oc, cur_comp, &src_comp); |
287 | err = objio_devices_lookup(pnfslay, objio_seg, cur_comp, | 250 | |
288 | &src_comp.oc_object_id.oid_device_id, | 251 | d = nfs4_find_get_deviceid(server, |
289 | gfp_flags); | 252 | &src_comp.oc_object_id.oid_device_id, |
290 | if (err) | 253 | pnfslay->plh_lc_cred, gfp_flags); |
254 | if (!d) { | ||
255 | err = -ENXIO; | ||
291 | goto err; | 256 | goto err; |
292 | ++cur_comp; | 257 | } |
258 | |||
259 | ode = container_of(d, struct objio_dev_ent, id_node); | ||
260 | objio_seg->oc.ods[cur_comp++] = &ode->od; | ||
293 | } | 261 | } |
294 | /* pnfs_osd_xdr_decode_layout_comp returns false on error */ | 262 | /* pnfs_osd_xdr_decode_layout_comp returns false on error */ |
295 | if (unlikely(err)) | 263 | if (unlikely(err)) |
@@ -653,6 +621,7 @@ static struct pnfs_layoutdriver_type objlayout_type = { | |||
653 | .flags = PNFS_LAYOUTRET_ON_SETATTR | | 621 | .flags = PNFS_LAYOUTRET_ON_SETATTR | |
654 | PNFS_LAYOUTRET_ON_ERROR, | 622 | PNFS_LAYOUTRET_ON_ERROR, |
655 | 623 | ||
624 | .max_deviceinfo_size = PAGE_SIZE, | ||
656 | .owner = THIS_MODULE, | 625 | .owner = THIS_MODULE, |
657 | .alloc_layout_hdr = objlayout_alloc_layout_hdr, | 626 | .alloc_layout_hdr = objlayout_alloc_layout_hdr, |
658 | .free_layout_hdr = objlayout_free_layout_hdr, | 627 | .free_layout_hdr = objlayout_free_layout_hdr, |
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index 697a16d11fac..c89357c7a914 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c | |||
@@ -574,76 +574,6 @@ loop_done: | |||
574 | dprintk("%s: Return\n", __func__); | 574 | dprintk("%s: Return\n", __func__); |
575 | } | 575 | } |
576 | 576 | ||
577 | |||
578 | /* | ||
579 | * Get Device Info API for io engines | ||
580 | */ | ||
581 | struct objlayout_deviceinfo { | ||
582 | struct page *page; | ||
583 | struct pnfs_osd_deviceaddr da; /* This must be last */ | ||
584 | }; | ||
585 | |||
586 | /* Initialize and call nfs_getdeviceinfo, then decode and return a | ||
587 | * "struct pnfs_osd_deviceaddr *" Eventually objlayout_put_deviceinfo() | ||
588 | * should be called. | ||
589 | */ | ||
590 | int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay, | ||
591 | struct nfs4_deviceid *d_id, struct pnfs_osd_deviceaddr **deviceaddr, | ||
592 | gfp_t gfp_flags) | ||
593 | { | ||
594 | struct objlayout_deviceinfo *odi; | ||
595 | struct pnfs_device pd; | ||
596 | struct page *page, **pages; | ||
597 | u32 *p; | ||
598 | int err; | ||
599 | |||
600 | page = alloc_page(gfp_flags); | ||
601 | if (!page) | ||
602 | return -ENOMEM; | ||
603 | |||
604 | pages = &page; | ||
605 | pd.pages = pages; | ||
606 | |||
607 | memcpy(&pd.dev_id, d_id, sizeof(*d_id)); | ||
608 | pd.layout_type = LAYOUT_OSD2_OBJECTS; | ||
609 | pd.pages = &page; | ||
610 | pd.pgbase = 0; | ||
611 | pd.pglen = PAGE_SIZE; | ||
612 | pd.mincount = 0; | ||
613 | pd.maxcount = PAGE_SIZE; | ||
614 | |||
615 | err = nfs4_proc_getdeviceinfo(NFS_SERVER(pnfslay->plh_inode), &pd, | ||
616 | pnfslay->plh_lc_cred); | ||
617 | dprintk("%s nfs_getdeviceinfo returned %d\n", __func__, err); | ||
618 | if (err) | ||
619 | goto err_out; | ||
620 | |||
621 | p = page_address(page); | ||
622 | odi = kzalloc(sizeof(*odi), gfp_flags); | ||
623 | if (!odi) { | ||
624 | err = -ENOMEM; | ||
625 | goto err_out; | ||
626 | } | ||
627 | pnfs_osd_xdr_decode_deviceaddr(&odi->da, p); | ||
628 | odi->page = page; | ||
629 | *deviceaddr = &odi->da; | ||
630 | return 0; | ||
631 | |||
632 | err_out: | ||
633 | __free_page(page); | ||
634 | return err; | ||
635 | } | ||
636 | |||
637 | void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr) | ||
638 | { | ||
639 | struct objlayout_deviceinfo *odi = container_of(deviceaddr, | ||
640 | struct objlayout_deviceinfo, | ||
641 | da); | ||
642 | |||
643 | __free_page(odi->page); | ||
644 | kfree(odi); | ||
645 | } | ||
646 | |||
647 | enum { | 577 | enum { |
648 | OBJLAYOUT_MAX_URI_LEN = 256, OBJLAYOUT_MAX_OSDNAME_LEN = 64, | 578 | OBJLAYOUT_MAX_URI_LEN = 256, OBJLAYOUT_MAX_OSDNAME_LEN = 64, |
649 | OBJLAYOUT_MAX_SYSID_HEX_LEN = OSD_SYSTEMID_LEN * 2 + 1, | 579 | OBJLAYOUT_MAX_SYSID_HEX_LEN = OSD_SYSTEMID_LEN * 2 + 1, |
diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h index fd13f1d2f136..3a0828d57339 100644 --- a/fs/nfs/objlayout/objlayout.h +++ b/fs/nfs/objlayout/objlayout.h | |||
@@ -149,11 +149,6 @@ extern void objlayout_read_done(struct objlayout_io_res *oir, | |||
149 | extern void objlayout_write_done(struct objlayout_io_res *oir, | 149 | extern void objlayout_write_done(struct objlayout_io_res *oir, |
150 | ssize_t status, bool sync); | 150 | ssize_t status, bool sync); |
151 | 151 | ||
152 | extern int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay, | ||
153 | struct nfs4_deviceid *d_id, struct pnfs_osd_deviceaddr **deviceaddr, | ||
154 | gfp_t gfp_flags); | ||
155 | extern void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr); | ||
156 | |||
157 | /* | 152 | /* |
158 | * exported generic objects function vectors | 153 | * exported generic objects function vectors |
159 | */ | 154 | */ |
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index be7cbce6e4c7..94e16ec88312 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c | |||
@@ -481,6 +481,14 @@ size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, | |||
481 | return 0; | 481 | return 0; |
482 | } | 482 | } |
483 | 483 | ||
484 | /* | ||
485 | * Limit the request size so that we can still allocate a page array | ||
486 | * for it without upsetting the slab allocator. | ||
487 | */ | ||
488 | if (((desc->pg_count + req->wb_bytes) >> PAGE_SHIFT) * | ||
489 | sizeof(struct page) > PAGE_SIZE) | ||
490 | return 0; | ||
491 | |||
484 | return min(desc->pg_bsize - desc->pg_count, (size_t)req->wb_bytes); | 492 | return min(desc->pg_bsize - desc->pg_count, (size_t)req->wb_bytes); |
485 | } | 493 | } |
486 | EXPORT_SYMBOL_GPL(nfs_generic_pg_test); | 494 | EXPORT_SYMBOL_GPL(nfs_generic_pg_test); |
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index a3851debf8a2..76de7f568119 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
@@ -594,6 +594,9 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list, | |||
594 | dprintk("%s freeing layout for inode %lu\n", __func__, | 594 | dprintk("%s freeing layout for inode %lu\n", __func__, |
595 | lo->plh_inode->i_ino); | 595 | lo->plh_inode->i_ino); |
596 | inode = lo->plh_inode; | 596 | inode = lo->plh_inode; |
597 | |||
598 | pnfs_layoutcommit_inode(inode, false); | ||
599 | |||
597 | spin_lock(&inode->i_lock); | 600 | spin_lock(&inode->i_lock); |
598 | list_del_init(&lo->plh_bulk_destroy); | 601 | list_del_init(&lo->plh_bulk_destroy); |
599 | lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */ | 602 | lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */ |
@@ -682,17 +685,6 @@ static bool pnfs_seqid_is_newer(u32 s1, u32 s2) | |||
682 | return (s32)(s1 - s2) > 0; | 685 | return (s32)(s1 - s2) > 0; |
683 | } | 686 | } |
684 | 687 | ||
685 | static void | ||
686 | pnfs_verify_layout_stateid(struct pnfs_layout_hdr *lo, | ||
687 | const nfs4_stateid *new, | ||
688 | struct list_head *free_me_list) | ||
689 | { | ||
690 | if (nfs4_stateid_match_other(&lo->plh_stateid, new)) | ||
691 | return; | ||
692 | /* Layout is new! Kill existing layout segments */ | ||
693 | pnfs_mark_matching_lsegs_invalid(lo, free_me_list, NULL); | ||
694 | } | ||
695 | |||
696 | /* update lo->plh_stateid with new if is more recent */ | 688 | /* update lo->plh_stateid with new if is more recent */ |
697 | void | 689 | void |
698 | pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, | 690 | pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, |
@@ -749,7 +741,8 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, | |||
749 | status = -EAGAIN; | 741 | status = -EAGAIN; |
750 | } else if (!nfs4_valid_open_stateid(open_state)) { | 742 | } else if (!nfs4_valid_open_stateid(open_state)) { |
751 | status = -EBADF; | 743 | status = -EBADF; |
752 | } else if (list_empty(&lo->plh_segs)) { | 744 | } else if (list_empty(&lo->plh_segs) || |
745 | test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) { | ||
753 | int seq; | 746 | int seq; |
754 | 747 | ||
755 | do { | 748 | do { |
@@ -864,6 +857,16 @@ _pnfs_return_layout(struct inode *ino) | |||
864 | empty = list_empty(&lo->plh_segs); | 857 | empty = list_empty(&lo->plh_segs); |
865 | pnfs_clear_layoutcommit(ino, &tmp_list); | 858 | pnfs_clear_layoutcommit(ino, &tmp_list); |
866 | pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL); | 859 | pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL); |
860 | |||
861 | if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) { | ||
862 | struct pnfs_layout_range range = { | ||
863 | .iomode = IOMODE_ANY, | ||
864 | .offset = 0, | ||
865 | .length = NFS4_MAX_UINT64, | ||
866 | }; | ||
867 | NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo, &range); | ||
868 | } | ||
869 | |||
867 | /* Don't send a LAYOUTRETURN if list was initially empty */ | 870 | /* Don't send a LAYOUTRETURN if list was initially empty */ |
868 | if (empty) { | 871 | if (empty) { |
869 | spin_unlock(&ino->i_lock); | 872 | spin_unlock(&ino->i_lock); |
@@ -871,6 +874,8 @@ _pnfs_return_layout(struct inode *ino) | |||
871 | dprintk("NFS: %s no layout segments to return\n", __func__); | 874 | dprintk("NFS: %s no layout segments to return\n", __func__); |
872 | goto out; | 875 | goto out; |
873 | } | 876 | } |
877 | |||
878 | set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); | ||
874 | lo->plh_block_lgets++; | 879 | lo->plh_block_lgets++; |
875 | spin_unlock(&ino->i_lock); | 880 | spin_unlock(&ino->i_lock); |
876 | pnfs_free_lseg_list(&tmp_list); | 881 | pnfs_free_lseg_list(&tmp_list); |
@@ -1358,25 +1363,41 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) | |||
1358 | goto out; | 1363 | goto out; |
1359 | } | 1364 | } |
1360 | 1365 | ||
1366 | init_lseg(lo, lseg); | ||
1367 | lseg->pls_range = res->range; | ||
1368 | |||
1361 | spin_lock(&ino->i_lock); | 1369 | spin_lock(&ino->i_lock); |
1362 | if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { | 1370 | if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { |
1363 | dprintk("%s forget reply due to recall\n", __func__); | 1371 | dprintk("%s forget reply due to recall\n", __func__); |
1364 | goto out_forget_reply; | 1372 | goto out_forget_reply; |
1365 | } | 1373 | } |
1366 | 1374 | ||
1367 | if (pnfs_layoutgets_blocked(lo, 1) || | 1375 | if (pnfs_layoutgets_blocked(lo, 1)) { |
1368 | pnfs_layout_stateid_blocked(lo, &res->stateid)) { | ||
1369 | dprintk("%s forget reply due to state\n", __func__); | 1376 | dprintk("%s forget reply due to state\n", __func__); |
1370 | goto out_forget_reply; | 1377 | goto out_forget_reply; |
1371 | } | 1378 | } |
1372 | 1379 | ||
1373 | /* Check that the new stateid matches the old stateid */ | 1380 | if (nfs4_stateid_match_other(&lo->plh_stateid, &res->stateid)) { |
1374 | pnfs_verify_layout_stateid(lo, &res->stateid, &free_me); | 1381 | /* existing state ID, make sure the sequence number matches. */ |
1375 | /* Done processing layoutget. Set the layout stateid */ | 1382 | if (pnfs_layout_stateid_blocked(lo, &res->stateid)) { |
1376 | pnfs_set_layout_stateid(lo, &res->stateid, false); | 1383 | dprintk("%s forget reply due to sequence\n", __func__); |
1384 | goto out_forget_reply; | ||
1385 | } | ||
1386 | pnfs_set_layout_stateid(lo, &res->stateid, false); | ||
1387 | } else { | ||
1388 | /* | ||
1389 | * We got an entirely new state ID. Mark all segments for the | ||
1390 | * inode invalid, and don't bother validating the stateid | ||
1391 | * sequence number. | ||
1392 | */ | ||
1393 | pnfs_mark_matching_lsegs_invalid(lo, &free_me, NULL); | ||
1394 | |||
1395 | nfs4_stateid_copy(&lo->plh_stateid, &res->stateid); | ||
1396 | lo->plh_barrier = be32_to_cpu(res->stateid.seqid); | ||
1397 | } | ||
1398 | |||
1399 | clear_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); | ||
1377 | 1400 | ||
1378 | init_lseg(lo, lseg); | ||
1379 | lseg->pls_range = res->range; | ||
1380 | pnfs_get_lseg(lseg); | 1401 | pnfs_get_lseg(lseg); |
1381 | pnfs_layout_insert_lseg(lo, lseg); | 1402 | pnfs_layout_insert_lseg(lo, lseg); |
1382 | 1403 | ||
@@ -1797,6 +1818,35 @@ pnfs_set_layoutcommit(struct nfs_pgio_header *hdr) | |||
1797 | } | 1818 | } |
1798 | EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); | 1819 | EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); |
1799 | 1820 | ||
1821 | void pnfs_commit_set_layoutcommit(struct nfs_commit_data *data) | ||
1822 | { | ||
1823 | struct inode *inode = data->inode; | ||
1824 | struct nfs_inode *nfsi = NFS_I(inode); | ||
1825 | bool mark_as_dirty = false; | ||
1826 | |||
1827 | spin_lock(&inode->i_lock); | ||
1828 | if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { | ||
1829 | mark_as_dirty = true; | ||
1830 | dprintk("%s: Set layoutcommit for inode %lu ", | ||
1831 | __func__, inode->i_ino); | ||
1832 | } | ||
1833 | if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &data->lseg->pls_flags)) { | ||
1834 | /* references matched in nfs4_layoutcommit_release */ | ||
1835 | pnfs_get_lseg(data->lseg); | ||
1836 | } | ||
1837 | if (data->lwb > nfsi->layout->plh_lwb) | ||
1838 | nfsi->layout->plh_lwb = data->lwb; | ||
1839 | spin_unlock(&inode->i_lock); | ||
1840 | dprintk("%s: lseg %p end_pos %llu\n", | ||
1841 | __func__, data->lseg, nfsi->layout->plh_lwb); | ||
1842 | |||
1843 | /* if pnfs_layoutcommit_inode() runs between inode locks, the next one | ||
1844 | * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */ | ||
1845 | if (mark_as_dirty) | ||
1846 | mark_inode_dirty_sync(inode); | ||
1847 | } | ||
1848 | EXPORT_SYMBOL_GPL(pnfs_commit_set_layoutcommit); | ||
1849 | |||
1800 | void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data) | 1850 | void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data) |
1801 | { | 1851 | { |
1802 | struct nfs_server *nfss = NFS_SERVER(data->args.inode); | 1852 | struct nfs_server *nfss = NFS_SERVER(data->args.inode); |
@@ -1817,6 +1867,7 @@ void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data) | |||
1817 | int | 1867 | int |
1818 | pnfs_layoutcommit_inode(struct inode *inode, bool sync) | 1868 | pnfs_layoutcommit_inode(struct inode *inode, bool sync) |
1819 | { | 1869 | { |
1870 | struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; | ||
1820 | struct nfs4_layoutcommit_data *data; | 1871 | struct nfs4_layoutcommit_data *data; |
1821 | struct nfs_inode *nfsi = NFS_I(inode); | 1872 | struct nfs_inode *nfsi = NFS_I(inode); |
1822 | loff_t end_pos; | 1873 | loff_t end_pos; |
@@ -1867,6 +1918,20 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync) | |||
1867 | data->args.lastbytewritten = end_pos - 1; | 1918 | data->args.lastbytewritten = end_pos - 1; |
1868 | data->res.server = NFS_SERVER(inode); | 1919 | data->res.server = NFS_SERVER(inode); |
1869 | 1920 | ||
1921 | if (ld->prepare_layoutcommit) { | ||
1922 | status = ld->prepare_layoutcommit(&data->args); | ||
1923 | if (status) { | ||
1924 | spin_lock(&inode->i_lock); | ||
1925 | if (end_pos < nfsi->layout->plh_lwb) | ||
1926 | nfsi->layout->plh_lwb = end_pos; | ||
1927 | spin_unlock(&inode->i_lock); | ||
1928 | put_rpccred(data->cred); | ||
1929 | set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags); | ||
1930 | goto clear_layoutcommitting; | ||
1931 | } | ||
1932 | } | ||
1933 | |||
1934 | |||
1870 | status = nfs4_proc_layoutcommit(data, sync); | 1935 | status = nfs4_proc_layoutcommit(data, sync); |
1871 | out: | 1936 | out: |
1872 | if (status) | 1937 | if (status) |
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index aca3dff5dae6..693ce42ec683 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h | |||
@@ -65,12 +65,15 @@ enum { | |||
65 | NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */ | 65 | NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */ |
66 | NFS_LAYOUT_ROC, /* some lseg had roc bit set */ | 66 | NFS_LAYOUT_ROC, /* some lseg had roc bit set */ |
67 | NFS_LAYOUT_RETURN, /* Return this layout ASAP */ | 67 | NFS_LAYOUT_RETURN, /* Return this layout ASAP */ |
68 | NFS_LAYOUT_INVALID_STID, /* layout stateid id is invalid */ | ||
68 | }; | 69 | }; |
69 | 70 | ||
70 | enum layoutdriver_policy_flags { | 71 | enum layoutdriver_policy_flags { |
71 | /* Should the pNFS client commit and return the layout upon a setattr */ | 72 | /* Should the pNFS client commit and return the layout upon truncate to |
73 | * a smaller size */ | ||
72 | PNFS_LAYOUTRET_ON_SETATTR = 1 << 0, | 74 | PNFS_LAYOUTRET_ON_SETATTR = 1 << 0, |
73 | PNFS_LAYOUTRET_ON_ERROR = 1 << 1, | 75 | PNFS_LAYOUTRET_ON_ERROR = 1 << 1, |
76 | PNFS_READ_WHOLE_PAGE = 1 << 2, | ||
74 | }; | 77 | }; |
75 | 78 | ||
76 | struct nfs4_deviceid_node; | 79 | struct nfs4_deviceid_node; |
@@ -82,6 +85,7 @@ struct pnfs_layoutdriver_type { | |||
82 | const char *name; | 85 | const char *name; |
83 | struct module *owner; | 86 | struct module *owner; |
84 | unsigned flags; | 87 | unsigned flags; |
88 | unsigned max_deviceinfo_size; | ||
85 | 89 | ||
86 | int (*set_layoutdriver) (struct nfs_server *, const struct nfs_fh *); | 90 | int (*set_layoutdriver) (struct nfs_server *, const struct nfs_fh *); |
87 | int (*clear_layoutdriver) (struct nfs_server *); | 91 | int (*clear_layoutdriver) (struct nfs_server *); |
@@ -92,6 +96,9 @@ struct pnfs_layoutdriver_type { | |||
92 | struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr, gfp_t gfp_flags); | 96 | struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr, gfp_t gfp_flags); |
93 | void (*free_lseg) (struct pnfs_layout_segment *lseg); | 97 | void (*free_lseg) (struct pnfs_layout_segment *lseg); |
94 | 98 | ||
99 | void (*return_range) (struct pnfs_layout_hdr *lo, | ||
100 | struct pnfs_layout_range *range); | ||
101 | |||
95 | /* test for nfs page cache coalescing */ | 102 | /* test for nfs page cache coalescing */ |
96 | const struct nfs_pageio_ops *pg_read_ops; | 103 | const struct nfs_pageio_ops *pg_read_ops; |
97 | const struct nfs_pageio_ops *pg_write_ops; | 104 | const struct nfs_pageio_ops *pg_write_ops; |
@@ -121,14 +128,17 @@ struct pnfs_layoutdriver_type { | |||
121 | enum pnfs_try_status (*write_pagelist)(struct nfs_pgio_header *, int); | 128 | enum pnfs_try_status (*write_pagelist)(struct nfs_pgio_header *, int); |
122 | 129 | ||
123 | void (*free_deviceid_node) (struct nfs4_deviceid_node *); | 130 | void (*free_deviceid_node) (struct nfs4_deviceid_node *); |
131 | struct nfs4_deviceid_node * (*alloc_deviceid_node) | ||
132 | (struct nfs_server *server, struct pnfs_device *pdev, | ||
133 | gfp_t gfp_flags); | ||
124 | 134 | ||
125 | void (*encode_layoutreturn) (struct pnfs_layout_hdr *layoutid, | 135 | void (*encode_layoutreturn) (struct pnfs_layout_hdr *layoutid, |
126 | struct xdr_stream *xdr, | 136 | struct xdr_stream *xdr, |
127 | const struct nfs4_layoutreturn_args *args); | 137 | const struct nfs4_layoutreturn_args *args); |
128 | 138 | ||
129 | void (*cleanup_layoutcommit) (struct nfs4_layoutcommit_data *data); | 139 | void (*cleanup_layoutcommit) (struct nfs4_layoutcommit_data *data); |
130 | 140 | int (*prepare_layoutcommit) (struct nfs4_layoutcommit_args *args); | |
131 | void (*encode_layoutcommit) (struct pnfs_layout_hdr *layoutid, | 141 | void (*encode_layoutcommit) (struct pnfs_layout_hdr *lo, |
132 | struct xdr_stream *xdr, | 142 | struct xdr_stream *xdr, |
133 | const struct nfs4_layoutcommit_args *args); | 143 | const struct nfs4_layoutcommit_args *args); |
134 | }; | 144 | }; |
@@ -171,9 +181,6 @@ extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *); | |||
171 | extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); | 181 | extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); |
172 | 182 | ||
173 | /* nfs4proc.c */ | 183 | /* nfs4proc.c */ |
174 | extern int nfs4_proc_getdevicelist(struct nfs_server *server, | ||
175 | const struct nfs_fh *fh, | ||
176 | struct pnfs_devicelist *devlist); | ||
177 | extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, | 184 | extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, |
178 | struct pnfs_device *dev, | 185 | struct pnfs_device *dev, |
179 | struct rpc_cred *cred); | 186 | struct rpc_cred *cred); |
@@ -219,6 +226,7 @@ void pnfs_roc_release(struct inode *ino); | |||
219 | void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); | 226 | void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); |
220 | bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task); | 227 | bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task); |
221 | void pnfs_set_layoutcommit(struct nfs_pgio_header *); | 228 | void pnfs_set_layoutcommit(struct nfs_pgio_header *); |
229 | void pnfs_commit_set_layoutcommit(struct nfs_commit_data *data); | ||
222 | void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); | 230 | void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); |
223 | int pnfs_layoutcommit_inode(struct inode *inode, bool sync); | 231 | int pnfs_layoutcommit_inode(struct inode *inode, bool sync); |
224 | int _pnfs_return_layout(struct inode *); | 232 | int _pnfs_return_layout(struct inode *); |
@@ -255,11 +263,12 @@ struct nfs4_deviceid_node { | |||
255 | atomic_t ref; | 263 | atomic_t ref; |
256 | }; | 264 | }; |
257 | 265 | ||
258 | struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); | 266 | struct nfs4_deviceid_node * |
267 | nfs4_find_get_deviceid(struct nfs_server *server, | ||
268 | const struct nfs4_deviceid *id, struct rpc_cred *cred, | ||
269 | gfp_t gfp_mask); | ||
259 | void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); | 270 | void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); |
260 | void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, | 271 | void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, struct nfs_server *, |
261 | const struct pnfs_layoutdriver_type *, | ||
262 | const struct nfs_client *, | ||
263 | const struct nfs4_deviceid *); | 272 | const struct nfs4_deviceid *); |
264 | struct nfs4_deviceid_node *nfs4_insert_deviceid_node(struct nfs4_deviceid_node *); | 273 | struct nfs4_deviceid_node *nfs4_insert_deviceid_node(struct nfs4_deviceid_node *); |
265 | bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *); | 274 | bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *); |
@@ -267,6 +276,13 @@ void nfs4_mark_deviceid_unavailable(struct nfs4_deviceid_node *node); | |||
267 | bool nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node); | 276 | bool nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node); |
268 | void nfs4_deviceid_purge_client(const struct nfs_client *); | 277 | void nfs4_deviceid_purge_client(const struct nfs_client *); |
269 | 278 | ||
279 | static inline struct nfs4_deviceid_node * | ||
280 | nfs4_get_deviceid(struct nfs4_deviceid_node *d) | ||
281 | { | ||
282 | atomic_inc(&d->ref); | ||
283 | return d; | ||
284 | } | ||
285 | |||
270 | static inline struct pnfs_layout_segment * | 286 | static inline struct pnfs_layout_segment * |
271 | pnfs_get_lseg(struct pnfs_layout_segment *lseg) | 287 | pnfs_get_lseg(struct pnfs_layout_segment *lseg) |
272 | { | 288 | { |
@@ -368,6 +384,14 @@ pnfs_ld_layoutret_on_setattr(struct inode *inode) | |||
368 | } | 384 | } |
369 | 385 | ||
370 | static inline bool | 386 | static inline bool |
387 | pnfs_ld_read_whole_page(struct inode *inode) | ||
388 | { | ||
389 | if (!pnfs_enabled_sb(NFS_SERVER(inode))) | ||
390 | return false; | ||
391 | return NFS_SERVER(inode)->pnfs_curr_ld->flags & PNFS_READ_WHOLE_PAGE; | ||
392 | } | ||
393 | |||
394 | static inline bool | ||
371 | pnfs_layoutcommit_outstanding(struct inode *inode) | 395 | pnfs_layoutcommit_outstanding(struct inode *inode) |
372 | { | 396 | { |
373 | struct nfs_inode *nfsi = NFS_I(inode); | 397 | struct nfs_inode *nfsi = NFS_I(inode); |
@@ -443,6 +467,12 @@ pnfs_ld_layoutret_on_setattr(struct inode *inode) | |||
443 | } | 467 | } |
444 | 468 | ||
445 | static inline bool | 469 | static inline bool |
470 | pnfs_ld_read_whole_page(struct inode *inode) | ||
471 | { | ||
472 | return false; | ||
473 | } | ||
474 | |||
475 | static inline bool | ||
446 | pnfs_roc(struct inode *ino) | 476 | pnfs_roc(struct inode *ino) |
447 | { | 477 | { |
448 | return false; | 478 | return false; |
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c index 6da209bd9408..aa2ec0015183 100644 --- a/fs/nfs/pnfs_dev.c +++ b/fs/nfs/pnfs_dev.c | |||
@@ -29,6 +29,9 @@ | |||
29 | */ | 29 | */ |
30 | 30 | ||
31 | #include <linux/export.h> | 31 | #include <linux/export.h> |
32 | #include <linux/nfs_fs.h> | ||
33 | #include "nfs4session.h" | ||
34 | #include "internal.h" | ||
32 | #include "pnfs.h" | 35 | #include "pnfs.h" |
33 | 36 | ||
34 | #define NFSDBG_FACILITY NFSDBG_PNFS | 37 | #define NFSDBG_FACILITY NFSDBG_PNFS |
@@ -89,6 +92,74 @@ _lookup_deviceid(const struct pnfs_layoutdriver_type *ld, | |||
89 | return NULL; | 92 | return NULL; |
90 | } | 93 | } |
91 | 94 | ||
95 | static struct nfs4_deviceid_node * | ||
96 | nfs4_get_device_info(struct nfs_server *server, | ||
97 | const struct nfs4_deviceid *dev_id, | ||
98 | struct rpc_cred *cred, gfp_t gfp_flags) | ||
99 | { | ||
100 | struct nfs4_deviceid_node *d = NULL; | ||
101 | struct pnfs_device *pdev = NULL; | ||
102 | struct page **pages = NULL; | ||
103 | u32 max_resp_sz; | ||
104 | int max_pages; | ||
105 | int rc, i; | ||
106 | |||
107 | /* | ||
108 | * Use the session max response size as the basis for setting | ||
109 | * GETDEVICEINFO's maxcount | ||
110 | */ | ||
111 | max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; | ||
112 | if (server->pnfs_curr_ld->max_deviceinfo_size && | ||
113 | server->pnfs_curr_ld->max_deviceinfo_size < max_resp_sz) | ||
114 | max_resp_sz = server->pnfs_curr_ld->max_deviceinfo_size; | ||
115 | max_pages = nfs_page_array_len(0, max_resp_sz); | ||
116 | dprintk("%s: server %p max_resp_sz %u max_pages %d\n", | ||
117 | __func__, server, max_resp_sz, max_pages); | ||
118 | |||
119 | pdev = kzalloc(sizeof(*pdev), gfp_flags); | ||
120 | if (!pdev) | ||
121 | return NULL; | ||
122 | |||
123 | pages = kcalloc(max_pages, sizeof(struct page *), gfp_flags); | ||
124 | if (!pages) | ||
125 | goto out_free_pdev; | ||
126 | |||
127 | for (i = 0; i < max_pages; i++) { | ||
128 | pages[i] = alloc_page(gfp_flags); | ||
129 | if (!pages[i]) | ||
130 | goto out_free_pages; | ||
131 | } | ||
132 | |||
133 | memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id)); | ||
134 | pdev->layout_type = server->pnfs_curr_ld->id; | ||
135 | pdev->pages = pages; | ||
136 | pdev->pgbase = 0; | ||
137 | pdev->pglen = max_resp_sz; | ||
138 | pdev->mincount = 0; | ||
139 | pdev->maxcount = max_resp_sz - nfs41_maxgetdevinfo_overhead; | ||
140 | |||
141 | rc = nfs4_proc_getdeviceinfo(server, pdev, cred); | ||
142 | dprintk("%s getdevice info returns %d\n", __func__, rc); | ||
143 | if (rc) | ||
144 | goto out_free_pages; | ||
145 | |||
146 | /* | ||
147 | * Found new device, need to decode it and then add it to the | ||
148 | * list of known devices for this mountpoint. | ||
149 | */ | ||
150 | d = server->pnfs_curr_ld->alloc_deviceid_node(server, pdev, | ||
151 | gfp_flags); | ||
152 | |||
153 | out_free_pages: | ||
154 | for (i = 0; i < max_pages; i++) | ||
155 | __free_page(pages[i]); | ||
156 | kfree(pages); | ||
157 | out_free_pdev: | ||
158 | kfree(pdev); | ||
159 | dprintk("<-- %s d %p\n", __func__, d); | ||
160 | return d; | ||
161 | } | ||
162 | |||
92 | /* | 163 | /* |
93 | * Lookup a deviceid in cache and get a reference count on it if found | 164 | * Lookup a deviceid in cache and get a reference count on it if found |
94 | * | 165 | * |
@@ -96,14 +167,14 @@ _lookup_deviceid(const struct pnfs_layoutdriver_type *ld, | |||
96 | * @id deviceid to look up | 167 | * @id deviceid to look up |
97 | */ | 168 | */ |
98 | static struct nfs4_deviceid_node * | 169 | static struct nfs4_deviceid_node * |
99 | _find_get_deviceid(const struct pnfs_layoutdriver_type *ld, | 170 | __nfs4_find_get_deviceid(struct nfs_server *server, |
100 | const struct nfs_client *clp, const struct nfs4_deviceid *id, | 171 | const struct nfs4_deviceid *id, long hash) |
101 | long hash) | ||
102 | { | 172 | { |
103 | struct nfs4_deviceid_node *d; | 173 | struct nfs4_deviceid_node *d; |
104 | 174 | ||
105 | rcu_read_lock(); | 175 | rcu_read_lock(); |
106 | d = _lookup_deviceid(ld, clp, id, hash); | 176 | d = _lookup_deviceid(server->pnfs_curr_ld, server->nfs_client, id, |
177 | hash); | ||
107 | if (d != NULL) | 178 | if (d != NULL) |
108 | atomic_inc(&d->ref); | 179 | atomic_inc(&d->ref); |
109 | rcu_read_unlock(); | 180 | rcu_read_unlock(); |
@@ -111,10 +182,33 @@ _find_get_deviceid(const struct pnfs_layoutdriver_type *ld, | |||
111 | } | 182 | } |
112 | 183 | ||
113 | struct nfs4_deviceid_node * | 184 | struct nfs4_deviceid_node * |
114 | nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *ld, | 185 | nfs4_find_get_deviceid(struct nfs_server *server, |
115 | const struct nfs_client *clp, const struct nfs4_deviceid *id) | 186 | const struct nfs4_deviceid *id, struct rpc_cred *cred, |
187 | gfp_t gfp_mask) | ||
116 | { | 188 | { |
117 | return _find_get_deviceid(ld, clp, id, nfs4_deviceid_hash(id)); | 189 | long hash = nfs4_deviceid_hash(id); |
190 | struct nfs4_deviceid_node *d, *new; | ||
191 | |||
192 | d = __nfs4_find_get_deviceid(server, id, hash); | ||
193 | if (d) | ||
194 | return d; | ||
195 | |||
196 | new = nfs4_get_device_info(server, id, cred, gfp_mask); | ||
197 | if (!new) | ||
198 | return new; | ||
199 | |||
200 | spin_lock(&nfs4_deviceid_lock); | ||
201 | d = __nfs4_find_get_deviceid(server, id, hash); | ||
202 | if (d) { | ||
203 | spin_unlock(&nfs4_deviceid_lock); | ||
204 | server->pnfs_curr_ld->free_deviceid_node(new); | ||
205 | return d; | ||
206 | } | ||
207 | hlist_add_head_rcu(&new->node, &nfs4_deviceid_cache[hash]); | ||
208 | atomic_inc(&new->ref); | ||
209 | spin_unlock(&nfs4_deviceid_lock); | ||
210 | |||
211 | return new; | ||
118 | } | 212 | } |
119 | EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid); | 213 | EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid); |
120 | 214 | ||
@@ -151,15 +245,13 @@ nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld, | |||
151 | EXPORT_SYMBOL_GPL(nfs4_delete_deviceid); | 245 | EXPORT_SYMBOL_GPL(nfs4_delete_deviceid); |
152 | 246 | ||
153 | void | 247 | void |
154 | nfs4_init_deviceid_node(struct nfs4_deviceid_node *d, | 248 | nfs4_init_deviceid_node(struct nfs4_deviceid_node *d, struct nfs_server *server, |
155 | const struct pnfs_layoutdriver_type *ld, | ||
156 | const struct nfs_client *nfs_client, | ||
157 | const struct nfs4_deviceid *id) | 249 | const struct nfs4_deviceid *id) |
158 | { | 250 | { |
159 | INIT_HLIST_NODE(&d->node); | 251 | INIT_HLIST_NODE(&d->node); |
160 | INIT_HLIST_NODE(&d->tmpnode); | 252 | INIT_HLIST_NODE(&d->tmpnode); |
161 | d->ld = ld; | 253 | d->ld = server->pnfs_curr_ld; |
162 | d->nfs_client = nfs_client; | 254 | d->nfs_client = server->nfs_client; |
163 | d->flags = 0; | 255 | d->flags = 0; |
164 | d->deviceid = *id; | 256 | d->deviceid = *id; |
165 | atomic_set(&d->ref, 1); | 257 | atomic_set(&d->ref, 1); |
@@ -167,39 +259,6 @@ nfs4_init_deviceid_node(struct nfs4_deviceid_node *d, | |||
167 | EXPORT_SYMBOL_GPL(nfs4_init_deviceid_node); | 259 | EXPORT_SYMBOL_GPL(nfs4_init_deviceid_node); |
168 | 260 | ||
169 | /* | 261 | /* |
170 | * Uniquely initialize and insert a deviceid node into cache | ||
171 | * | ||
172 | * @new new deviceid node | ||
173 | * Note that the caller must set up the following members: | ||
174 | * new->ld | ||
175 | * new->nfs_client | ||
176 | * new->deviceid | ||
177 | * | ||
178 | * @ret the inserted node, if none found, otherwise, the found entry. | ||
179 | */ | ||
180 | struct nfs4_deviceid_node * | ||
181 | nfs4_insert_deviceid_node(struct nfs4_deviceid_node *new) | ||
182 | { | ||
183 | struct nfs4_deviceid_node *d; | ||
184 | long hash; | ||
185 | |||
186 | spin_lock(&nfs4_deviceid_lock); | ||
187 | hash = nfs4_deviceid_hash(&new->deviceid); | ||
188 | d = _find_get_deviceid(new->ld, new->nfs_client, &new->deviceid, hash); | ||
189 | if (d) { | ||
190 | spin_unlock(&nfs4_deviceid_lock); | ||
191 | return d; | ||
192 | } | ||
193 | |||
194 | hlist_add_head_rcu(&new->node, &nfs4_deviceid_cache[hash]); | ||
195 | spin_unlock(&nfs4_deviceid_lock); | ||
196 | atomic_inc(&new->ref); | ||
197 | |||
198 | return new; | ||
199 | } | ||
200 | EXPORT_SYMBOL_GPL(nfs4_insert_deviceid_node); | ||
201 | |||
202 | /* | ||
203 | * Dereference a deviceid node and delete it when its reference count drops | 262 | * Dereference a deviceid node and delete it when its reference count drops |
204 | * to zero. | 263 | * to zero. |
205 | * | 264 | * |
@@ -299,4 +358,3 @@ nfs4_deviceid_mark_client_invalid(struct nfs_client *clp) | |||
299 | } | 358 | } |
300 | rcu_read_unlock(); | 359 | rcu_read_unlock(); |
301 | } | 360 | } |
302 | |||
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index e4499d5b51e8..31a11b0e885d 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -2065,11 +2065,6 @@ static int nfs23_validate_mount_data(void *options, | |||
2065 | return NFS_TEXT_DATA; | 2065 | return NFS_TEXT_DATA; |
2066 | } | 2066 | } |
2067 | 2067 | ||
2068 | #if !IS_ENABLED(CONFIG_NFS_V3) | ||
2069 | if (args->version == 3) | ||
2070 | goto out_v3_not_compiled; | ||
2071 | #endif /* !CONFIG_NFS_V3 */ | ||
2072 | |||
2073 | return 0; | 2068 | return 0; |
2074 | 2069 | ||
2075 | out_no_data: | 2070 | out_no_data: |
@@ -2085,12 +2080,6 @@ out_no_sec: | |||
2085 | dfprintk(MOUNT, "NFS: nfs_mount_data version supports only AUTH_SYS\n"); | 2080 | dfprintk(MOUNT, "NFS: nfs_mount_data version supports only AUTH_SYS\n"); |
2086 | return -EINVAL; | 2081 | return -EINVAL; |
2087 | 2082 | ||
2088 | #if !IS_ENABLED(CONFIG_NFS_V3) | ||
2089 | out_v3_not_compiled: | ||
2090 | dfprintk(MOUNT, "NFS: NFSv3 is not compiled into kernel\n"); | ||
2091 | return -EPROTONOSUPPORT; | ||
2092 | #endif /* !CONFIG_NFS_V3 */ | ||
2093 | |||
2094 | out_nomem: | 2083 | out_nomem: |
2095 | dfprintk(MOUNT, "NFS: not enough memory to handle mount options\n"); | 2084 | dfprintk(MOUNT, "NFS: not enough memory to handle mount options\n"); |
2096 | return -ENOMEM; | 2085 | return -ENOMEM; |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 175d5d073ccf..12493846a2d3 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -49,6 +49,9 @@ static const struct nfs_rw_ops nfs_rw_write_ops; | |||
49 | static void nfs_clear_request_commit(struct nfs_page *req); | 49 | static void nfs_clear_request_commit(struct nfs_page *req); |
50 | static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo, | 50 | static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo, |
51 | struct inode *inode); | 51 | struct inode *inode); |
52 | static struct nfs_page * | ||
53 | nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi, | ||
54 | struct page *page); | ||
52 | 55 | ||
53 | static struct kmem_cache *nfs_wdata_cachep; | 56 | static struct kmem_cache *nfs_wdata_cachep; |
54 | static mempool_t *nfs_wdata_mempool; | 57 | static mempool_t *nfs_wdata_mempool; |
@@ -95,38 +98,6 @@ static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) | |||
95 | } | 98 | } |
96 | 99 | ||
97 | /* | 100 | /* |
98 | * nfs_page_search_commits_for_head_request_locked | ||
99 | * | ||
100 | * Search through commit lists on @inode for the head request for @page. | ||
101 | * Must be called while holding the inode (which is cinfo) lock. | ||
102 | * | ||
103 | * Returns the head request if found, or NULL if not found. | ||
104 | */ | ||
105 | static struct nfs_page * | ||
106 | nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi, | ||
107 | struct page *page) | ||
108 | { | ||
109 | struct nfs_page *freq, *t; | ||
110 | struct nfs_commit_info cinfo; | ||
111 | struct inode *inode = &nfsi->vfs_inode; | ||
112 | |||
113 | nfs_init_cinfo_from_inode(&cinfo, inode); | ||
114 | |||
115 | /* search through pnfs commit lists */ | ||
116 | freq = pnfs_search_commit_reqs(inode, &cinfo, page); | ||
117 | if (freq) | ||
118 | return freq->wb_head; | ||
119 | |||
120 | /* Linearly search the commit list for the correct request */ | ||
121 | list_for_each_entry_safe(freq, t, &cinfo.mds->list, wb_list) { | ||
122 | if (freq->wb_page == page) | ||
123 | return freq->wb_head; | ||
124 | } | ||
125 | |||
126 | return NULL; | ||
127 | } | ||
128 | |||
129 | /* | ||
130 | * nfs_page_find_head_request_locked - find head request associated with @page | 101 | * nfs_page_find_head_request_locked - find head request associated with @page |
131 | * | 102 | * |
132 | * must be called while holding the inode lock. | 103 | * must be called while holding the inode lock. |
@@ -271,11 +242,14 @@ static void nfs_mark_uptodate(struct nfs_page *req) | |||
271 | 242 | ||
272 | static int wb_priority(struct writeback_control *wbc) | 243 | static int wb_priority(struct writeback_control *wbc) |
273 | { | 244 | { |
245 | int ret = 0; | ||
274 | if (wbc->for_reclaim) | 246 | if (wbc->for_reclaim) |
275 | return FLUSH_HIGHPRI | FLUSH_STABLE; | 247 | return FLUSH_HIGHPRI | FLUSH_STABLE; |
248 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
249 | ret = FLUSH_COND_STABLE; | ||
276 | if (wbc->for_kupdate || wbc->for_background) | 250 | if (wbc->for_kupdate || wbc->for_background) |
277 | return FLUSH_LOWPRI | FLUSH_COND_STABLE; | 251 | ret |= FLUSH_LOWPRI; |
278 | return FLUSH_COND_STABLE; | 252 | return ret; |
279 | } | 253 | } |
280 | 254 | ||
281 | /* | 255 | /* |
@@ -731,6 +705,8 @@ static void nfs_inode_remove_request(struct nfs_page *req) | |||
731 | if (likely(!PageSwapCache(head->wb_page))) { | 705 | if (likely(!PageSwapCache(head->wb_page))) { |
732 | set_page_private(head->wb_page, 0); | 706 | set_page_private(head->wb_page, 0); |
733 | ClearPagePrivate(head->wb_page); | 707 | ClearPagePrivate(head->wb_page); |
708 | smp_mb__after_atomic(); | ||
709 | wake_up_page(head->wb_page, PG_private); | ||
734 | clear_bit(PG_MAPPED, &head->wb_flags); | 710 | clear_bit(PG_MAPPED, &head->wb_flags); |
735 | } | 711 | } |
736 | nfsi->npages--; | 712 | nfsi->npages--; |
@@ -749,7 +725,38 @@ nfs_mark_request_dirty(struct nfs_page *req) | |||
749 | __set_page_dirty_nobuffers(req->wb_page); | 725 | __set_page_dirty_nobuffers(req->wb_page); |
750 | } | 726 | } |
751 | 727 | ||
752 | #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) | 728 | /* |
729 | * nfs_page_search_commits_for_head_request_locked | ||
730 | * | ||
731 | * Search through commit lists on @inode for the head request for @page. | ||
732 | * Must be called while holding the inode (which is cinfo) lock. | ||
733 | * | ||
734 | * Returns the head request if found, or NULL if not found. | ||
735 | */ | ||
736 | static struct nfs_page * | ||
737 | nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi, | ||
738 | struct page *page) | ||
739 | { | ||
740 | struct nfs_page *freq, *t; | ||
741 | struct nfs_commit_info cinfo; | ||
742 | struct inode *inode = &nfsi->vfs_inode; | ||
743 | |||
744 | nfs_init_cinfo_from_inode(&cinfo, inode); | ||
745 | |||
746 | /* search through pnfs commit lists */ | ||
747 | freq = pnfs_search_commit_reqs(inode, &cinfo, page); | ||
748 | if (freq) | ||
749 | return freq->wb_head; | ||
750 | |||
751 | /* Linearly search the commit list for the correct request */ | ||
752 | list_for_each_entry_safe(freq, t, &cinfo.mds->list, wb_list) { | ||
753 | if (freq->wb_page == page) | ||
754 | return freq->wb_head; | ||
755 | } | ||
756 | |||
757 | return NULL; | ||
758 | } | ||
759 | |||
753 | /** | 760 | /** |
754 | * nfs_request_add_commit_list - add request to a commit list | 761 | * nfs_request_add_commit_list - add request to a commit list |
755 | * @req: pointer to a struct nfs_page | 762 | * @req: pointer to a struct nfs_page |
@@ -867,36 +874,6 @@ int nfs_write_need_commit(struct nfs_pgio_header *hdr) | |||
867 | return hdr->verf.committed != NFS_FILE_SYNC; | 874 | return hdr->verf.committed != NFS_FILE_SYNC; |
868 | } | 875 | } |
869 | 876 | ||
870 | #else | ||
871 | static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo, | ||
872 | struct inode *inode) | ||
873 | { | ||
874 | } | ||
875 | |||
876 | void nfs_init_cinfo(struct nfs_commit_info *cinfo, | ||
877 | struct inode *inode, | ||
878 | struct nfs_direct_req *dreq) | ||
879 | { | ||
880 | } | ||
881 | |||
882 | void | ||
883 | nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, | ||
884 | struct nfs_commit_info *cinfo) | ||
885 | { | ||
886 | } | ||
887 | |||
888 | static void | ||
889 | nfs_clear_request_commit(struct nfs_page *req) | ||
890 | { | ||
891 | } | ||
892 | |||
893 | int nfs_write_need_commit(struct nfs_pgio_header *hdr) | ||
894 | { | ||
895 | return 0; | ||
896 | } | ||
897 | |||
898 | #endif | ||
899 | |||
900 | static void nfs_write_completion(struct nfs_pgio_header *hdr) | 877 | static void nfs_write_completion(struct nfs_pgio_header *hdr) |
901 | { | 878 | { |
902 | struct nfs_commit_info cinfo; | 879 | struct nfs_commit_info cinfo; |
@@ -932,7 +909,6 @@ out: | |||
932 | hdr->release(hdr); | 909 | hdr->release(hdr); |
933 | } | 910 | } |
934 | 911 | ||
935 | #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) | ||
936 | unsigned long | 912 | unsigned long |
937 | nfs_reqs_to_commit(struct nfs_commit_info *cinfo) | 913 | nfs_reqs_to_commit(struct nfs_commit_info *cinfo) |
938 | { | 914 | { |
@@ -989,19 +965,6 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, | |||
989 | return ret; | 965 | return ret; |
990 | } | 966 | } |
991 | 967 | ||
992 | #else | ||
993 | unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo) | ||
994 | { | ||
995 | return 0; | ||
996 | } | ||
997 | |||
998 | int nfs_scan_commit(struct inode *inode, struct list_head *dst, | ||
999 | struct nfs_commit_info *cinfo) | ||
1000 | { | ||
1001 | return 0; | ||
1002 | } | ||
1003 | #endif | ||
1004 | |||
1005 | /* | 968 | /* |
1006 | * Search for an existing write request, and attempt to update | 969 | * Search for an existing write request, and attempt to update |
1007 | * it to reflect a new dirty region on a given page. | 970 | * it to reflect a new dirty region on a given page. |
@@ -1394,7 +1357,6 @@ static int nfs_writeback_done(struct rpc_task *task, | |||
1394 | return status; | 1357 | return status; |
1395 | nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, hdr->res.count); | 1358 | nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, hdr->res.count); |
1396 | 1359 | ||
1397 | #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) | ||
1398 | if (hdr->res.verf->committed < hdr->args.stable && | 1360 | if (hdr->res.verf->committed < hdr->args.stable && |
1399 | task->tk_status >= 0) { | 1361 | task->tk_status >= 0) { |
1400 | /* We tried a write call, but the server did not | 1362 | /* We tried a write call, but the server did not |
@@ -1416,7 +1378,6 @@ static int nfs_writeback_done(struct rpc_task *task, | |||
1416 | complain = jiffies + 300 * HZ; | 1378 | complain = jiffies + 300 * HZ; |
1417 | } | 1379 | } |
1418 | } | 1380 | } |
1419 | #endif | ||
1420 | 1381 | ||
1421 | /* Deal with the suid/sgid bit corner case */ | 1382 | /* Deal with the suid/sgid bit corner case */ |
1422 | if (nfs_should_remove_suid(inode)) | 1383 | if (nfs_should_remove_suid(inode)) |
@@ -1469,7 +1430,6 @@ static void nfs_writeback_result(struct rpc_task *task, | |||
1469 | } | 1430 | } |
1470 | 1431 | ||
1471 | 1432 | ||
1472 | #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) | ||
1473 | static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) | 1433 | static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) |
1474 | { | 1434 | { |
1475 | int ret; | 1435 | int ret; |
@@ -1538,6 +1498,18 @@ int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, | |||
1538 | } | 1498 | } |
1539 | EXPORT_SYMBOL_GPL(nfs_initiate_commit); | 1499 | EXPORT_SYMBOL_GPL(nfs_initiate_commit); |
1540 | 1500 | ||
1501 | static loff_t nfs_get_lwb(struct list_head *head) | ||
1502 | { | ||
1503 | loff_t lwb = 0; | ||
1504 | struct nfs_page *req; | ||
1505 | |||
1506 | list_for_each_entry(req, head, wb_list) | ||
1507 | if (lwb < (req_offset(req) + req->wb_bytes)) | ||
1508 | lwb = req_offset(req) + req->wb_bytes; | ||
1509 | |||
1510 | return lwb; | ||
1511 | } | ||
1512 | |||
1541 | /* | 1513 | /* |
1542 | * Set up the argument/result storage required for the RPC call. | 1514 | * Set up the argument/result storage required for the RPC call. |
1543 | */ | 1515 | */ |
@@ -1557,6 +1529,9 @@ void nfs_init_commit(struct nfs_commit_data *data, | |||
1557 | data->inode = inode; | 1529 | data->inode = inode; |
1558 | data->cred = first->wb_context->cred; | 1530 | data->cred = first->wb_context->cred; |
1559 | data->lseg = lseg; /* reference transferred */ | 1531 | data->lseg = lseg; /* reference transferred */ |
1532 | /* only set lwb for pnfs commit */ | ||
1533 | if (lseg) | ||
1534 | data->lwb = nfs_get_lwb(&data->pages); | ||
1560 | data->mds_ops = &nfs_commit_ops; | 1535 | data->mds_ops = &nfs_commit_ops; |
1561 | data->completion_ops = cinfo->completion_ops; | 1536 | data->completion_ops = cinfo->completion_ops; |
1562 | data->dreq = cinfo->dreq; | 1537 | data->dreq = cinfo->dreq; |
@@ -1636,6 +1611,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) | |||
1636 | struct nfs_page *req; | 1611 | struct nfs_page *req; |
1637 | int status = data->task.tk_status; | 1612 | int status = data->task.tk_status; |
1638 | struct nfs_commit_info cinfo; | 1613 | struct nfs_commit_info cinfo; |
1614 | struct nfs_server *nfss; | ||
1639 | 1615 | ||
1640 | while (!list_empty(&data->pages)) { | 1616 | while (!list_empty(&data->pages)) { |
1641 | req = nfs_list_entry(data->pages.next); | 1617 | req = nfs_list_entry(data->pages.next); |
@@ -1669,6 +1645,10 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) | |||
1669 | next: | 1645 | next: |
1670 | nfs_unlock_and_release_request(req); | 1646 | nfs_unlock_and_release_request(req); |
1671 | } | 1647 | } |
1648 | nfss = NFS_SERVER(data->inode); | ||
1649 | if (atomic_long_read(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) | ||
1650 | clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); | ||
1651 | |||
1672 | nfs_init_cinfo(&cinfo, data->inode, data->dreq); | 1652 | nfs_init_cinfo(&cinfo, data->inode, data->dreq); |
1673 | if (atomic_dec_and_test(&cinfo.mds->rpcs_out)) | 1653 | if (atomic_dec_and_test(&cinfo.mds->rpcs_out)) |
1674 | nfs_commit_clear_lock(NFS_I(data->inode)); | 1654 | nfs_commit_clear_lock(NFS_I(data->inode)); |
@@ -1778,12 +1758,6 @@ out_mark_dirty: | |||
1778 | __mark_inode_dirty(inode, I_DIRTY_DATASYNC); | 1758 | __mark_inode_dirty(inode, I_DIRTY_DATASYNC); |
1779 | return ret; | 1759 | return ret; |
1780 | } | 1760 | } |
1781 | #else | ||
1782 | static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc) | ||
1783 | { | ||
1784 | return 0; | ||
1785 | } | ||
1786 | #endif | ||
1787 | 1761 | ||
1788 | int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) | 1762 | int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) |
1789 | { | 1763 | { |
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 5180a7ededec..28d649054d5f 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h | |||
@@ -443,22 +443,6 @@ static inline struct rpc_cred *nfs_file_cred(struct file *file) | |||
443 | } | 443 | } |
444 | 444 | ||
445 | /* | 445 | /* |
446 | * linux/fs/nfs/xattr.c | ||
447 | */ | ||
448 | #ifdef CONFIG_NFS_V3_ACL | ||
449 | extern ssize_t nfs3_listxattr(struct dentry *, char *, size_t); | ||
450 | extern ssize_t nfs3_getxattr(struct dentry *, const char *, void *, size_t); | ||
451 | extern int nfs3_setxattr(struct dentry *, const char *, | ||
452 | const void *, size_t, int); | ||
453 | extern int nfs3_removexattr (struct dentry *, const char *name); | ||
454 | #else | ||
455 | # define nfs3_listxattr NULL | ||
456 | # define nfs3_getxattr NULL | ||
457 | # define nfs3_setxattr NULL | ||
458 | # define nfs3_removexattr NULL | ||
459 | #endif | ||
460 | |||
461 | /* | ||
462 | * linux/fs/nfs/direct.c | 446 | * linux/fs/nfs/direct.c |
463 | */ | 447 | */ |
464 | extern ssize_t nfs_direct_IO(int, struct kiocb *, struct iov_iter *, loff_t); | 448 | extern ssize_t nfs_direct_IO(int, struct kiocb *, struct iov_iter *, loff_t); |
@@ -529,17 +513,9 @@ extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned | |||
529 | extern int nfs_wb_all(struct inode *inode); | 513 | extern int nfs_wb_all(struct inode *inode); |
530 | extern int nfs_wb_page(struct inode *inode, struct page* page); | 514 | extern int nfs_wb_page(struct inode *inode, struct page* page); |
531 | extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); | 515 | extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); |
532 | #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) | ||
533 | extern int nfs_commit_inode(struct inode *, int); | 516 | extern int nfs_commit_inode(struct inode *, int); |
534 | extern struct nfs_commit_data *nfs_commitdata_alloc(void); | 517 | extern struct nfs_commit_data *nfs_commitdata_alloc(void); |
535 | extern void nfs_commit_free(struct nfs_commit_data *data); | 518 | extern void nfs_commit_free(struct nfs_commit_data *data); |
536 | #else | ||
537 | static inline int | ||
538 | nfs_commit_inode(struct inode *inode, int how) | ||
539 | { | ||
540 | return 0; | ||
541 | } | ||
542 | #endif | ||
543 | 519 | ||
544 | static inline int | 520 | static inline int |
545 | nfs_have_writebacks(struct inode *inode) | 521 | nfs_have_writebacks(struct inode *inode) |
@@ -557,23 +533,6 @@ extern int nfs_readpage_async(struct nfs_open_context *, struct inode *, | |||
557 | struct page *); | 533 | struct page *); |
558 | 534 | ||
559 | /* | 535 | /* |
560 | * linux/fs/nfs3proc.c | ||
561 | */ | ||
562 | #ifdef CONFIG_NFS_V3_ACL | ||
563 | extern struct posix_acl *nfs3_get_acl(struct inode *inode, int type); | ||
564 | extern int nfs3_set_acl(struct inode *inode, struct posix_acl *acl, int type); | ||
565 | extern int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, | ||
566 | struct posix_acl *dfacl); | ||
567 | extern const struct xattr_handler *nfs3_xattr_handlers[]; | ||
568 | #else | ||
569 | static inline int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, | ||
570 | struct posix_acl *dfacl) | ||
571 | { | ||
572 | return 0; | ||
573 | } | ||
574 | #endif /* CONFIG_NFS_V3_ACL */ | ||
575 | |||
576 | /* | ||
577 | * inline functions | 536 | * inline functions |
578 | */ | 537 | */ |
579 | 538 | ||
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 0040629894df..6951c7d9097d 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h | |||
@@ -252,17 +252,6 @@ struct nfs4_layoutget { | |||
252 | gfp_t gfp_flags; | 252 | gfp_t gfp_flags; |
253 | }; | 253 | }; |
254 | 254 | ||
255 | struct nfs4_getdevicelist_args { | ||
256 | struct nfs4_sequence_args seq_args; | ||
257 | const struct nfs_fh *fh; | ||
258 | u32 layoutclass; | ||
259 | }; | ||
260 | |||
261 | struct nfs4_getdevicelist_res { | ||
262 | struct nfs4_sequence_res seq_res; | ||
263 | struct pnfs_devicelist *devlist; | ||
264 | }; | ||
265 | |||
266 | struct nfs4_getdeviceinfo_args { | 255 | struct nfs4_getdeviceinfo_args { |
267 | struct nfs4_sequence_args seq_args; | 256 | struct nfs4_sequence_args seq_args; |
268 | struct pnfs_device *pdev; | 257 | struct pnfs_device *pdev; |
@@ -279,6 +268,9 @@ struct nfs4_layoutcommit_args { | |||
279 | __u64 lastbytewritten; | 268 | __u64 lastbytewritten; |
280 | struct inode *inode; | 269 | struct inode *inode; |
281 | const u32 *bitmask; | 270 | const u32 *bitmask; |
271 | size_t layoutupdate_len; | ||
272 | struct page *layoutupdate_page; | ||
273 | struct page **layoutupdate_pages; | ||
282 | }; | 274 | }; |
283 | 275 | ||
284 | struct nfs4_layoutcommit_res { | 276 | struct nfs4_layoutcommit_res { |
@@ -1328,6 +1320,7 @@ struct nfs_commit_data { | |||
1328 | struct pnfs_layout_segment *lseg; | 1320 | struct pnfs_layout_segment *lseg; |
1329 | struct nfs_client *ds_clp; /* pNFS data server */ | 1321 | struct nfs_client *ds_clp; /* pNFS data server */ |
1330 | int ds_commit_index; | 1322 | int ds_commit_index; |
1323 | loff_t lwb; | ||
1331 | const struct rpc_call_ops *mds_ops; | 1324 | const struct rpc_call_ops *mds_ops; |
1332 | const struct nfs_commit_completion_ops *completion_ops; | 1325 | const struct nfs_commit_completion_ops *completion_ops; |
1333 | int (*commit_done_cb) (struct rpc_task *task, struct nfs_commit_data *data); | 1326 | int (*commit_done_cb) (struct rpc_task *task, struct nfs_commit_data *data); |
@@ -1346,6 +1339,7 @@ struct nfs_unlinkdata { | |||
1346 | struct inode *dir; | 1339 | struct inode *dir; |
1347 | struct rpc_cred *cred; | 1340 | struct rpc_cred *cred; |
1348 | struct nfs_fattr dir_attr; | 1341 | struct nfs_fattr dir_attr; |
1342 | long timeout; | ||
1349 | }; | 1343 | }; |
1350 | 1344 | ||
1351 | struct nfs_renamedata { | 1345 | struct nfs_renamedata { |
@@ -1359,6 +1353,7 @@ struct nfs_renamedata { | |||
1359 | struct dentry *new_dentry; | 1353 | struct dentry *new_dentry; |
1360 | struct nfs_fattr new_fattr; | 1354 | struct nfs_fattr new_fattr; |
1361 | void (*complete)(struct rpc_task *, struct nfs_renamedata *); | 1355 | void (*complete)(struct rpc_task *, struct nfs_renamedata *); |
1356 | long timeout; | ||
1362 | }; | 1357 | }; |
1363 | 1358 | ||
1364 | struct nfs_access_entry; | 1359 | struct nfs_access_entry; |
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 3df8c7db7a4e..2dca0cef3506 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h | |||
@@ -496,12 +496,14 @@ static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm, | |||
496 | } | 496 | } |
497 | 497 | ||
498 | /* | 498 | /* |
499 | * This is exported only for wait_on_page_locked/wait_on_page_writeback. | 499 | * This is exported only for wait_on_page_locked/wait_on_page_writeback, |
500 | * Never use this directly! | 500 | * and for filesystems which need to wait on PG_private. |
501 | */ | 501 | */ |
502 | extern void wait_on_page_bit(struct page *page, int bit_nr); | 502 | extern void wait_on_page_bit(struct page *page, int bit_nr); |
503 | 503 | ||
504 | extern int wait_on_page_bit_killable(struct page *page, int bit_nr); | 504 | extern int wait_on_page_bit_killable(struct page *page, int bit_nr); |
505 | extern int wait_on_page_bit_killable_timeout(struct page *page, | ||
506 | int bit_nr, unsigned long timeout); | ||
505 | 507 | ||
506 | static inline int wait_on_page_locked_killable(struct page *page) | 508 | static inline int wait_on_page_locked_killable(struct page *page) |
507 | { | 509 | { |
@@ -510,6 +512,12 @@ static inline int wait_on_page_locked_killable(struct page *page) | |||
510 | return 0; | 512 | return 0; |
511 | } | 513 | } |
512 | 514 | ||
515 | extern wait_queue_head_t *page_waitqueue(struct page *page); | ||
516 | static inline void wake_up_page(struct page *page, int bit) | ||
517 | { | ||
518 | __wake_up_bit(page_waitqueue(page), &page->flags, bit); | ||
519 | } | ||
520 | |||
513 | /* | 521 | /* |
514 | * Wait for a page to be unlocked. | 522 | * Wait for a page to be unlocked. |
515 | * | 523 | * |
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index fcbfe8783243..cf391eef2e6d 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h | |||
@@ -357,6 +357,7 @@ int xs_swapper(struct rpc_xprt *xprt, int enable); | |||
357 | #define XPRT_CONNECTION_ABORT (7) | 357 | #define XPRT_CONNECTION_ABORT (7) |
358 | #define XPRT_CONNECTION_CLOSE (8) | 358 | #define XPRT_CONNECTION_CLOSE (8) |
359 | #define XPRT_CONGESTED (9) | 359 | #define XPRT_CONGESTED (9) |
360 | #define XPRT_CONNECTION_REUSE (10) | ||
360 | 361 | ||
361 | static inline void xprt_set_connected(struct rpc_xprt *xprt) | 362 | static inline void xprt_set_connected(struct rpc_xprt *xprt) |
362 | { | 363 | { |
diff --git a/include/linux/wait.h b/include/linux/wait.h index 6fb1ba5f9b2f..80115bf88671 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h | |||
@@ -25,7 +25,7 @@ struct wait_bit_key { | |||
25 | void *flags; | 25 | void *flags; |
26 | int bit_nr; | 26 | int bit_nr; |
27 | #define WAIT_ATOMIC_T_BIT_NR -1 | 27 | #define WAIT_ATOMIC_T_BIT_NR -1 |
28 | unsigned long private; | 28 | unsigned long timeout; |
29 | }; | 29 | }; |
30 | 30 | ||
31 | struct wait_bit_queue { | 31 | struct wait_bit_queue { |
@@ -154,6 +154,7 @@ int __wait_on_bit_lock(wait_queue_head_t *, struct wait_bit_queue *, wait_bit_ac | |||
154 | void wake_up_bit(void *, int); | 154 | void wake_up_bit(void *, int); |
155 | void wake_up_atomic_t(atomic_t *); | 155 | void wake_up_atomic_t(atomic_t *); |
156 | int out_of_line_wait_on_bit(void *, int, wait_bit_action_f *, unsigned); | 156 | int out_of_line_wait_on_bit(void *, int, wait_bit_action_f *, unsigned); |
157 | int out_of_line_wait_on_bit_timeout(void *, int, wait_bit_action_f *, unsigned, unsigned long); | ||
157 | int out_of_line_wait_on_bit_lock(void *, int, wait_bit_action_f *, unsigned); | 158 | int out_of_line_wait_on_bit_lock(void *, int, wait_bit_action_f *, unsigned); |
158 | int out_of_line_wait_on_atomic_t(atomic_t *, int (*)(atomic_t *), unsigned); | 159 | int out_of_line_wait_on_atomic_t(atomic_t *, int (*)(atomic_t *), unsigned); |
159 | wait_queue_head_t *bit_waitqueue(void *, int); | 160 | wait_queue_head_t *bit_waitqueue(void *, int); |
@@ -859,6 +860,8 @@ int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key); | |||
859 | 860 | ||
860 | extern int bit_wait(struct wait_bit_key *); | 861 | extern int bit_wait(struct wait_bit_key *); |
861 | extern int bit_wait_io(struct wait_bit_key *); | 862 | extern int bit_wait_io(struct wait_bit_key *); |
863 | extern int bit_wait_timeout(struct wait_bit_key *); | ||
864 | extern int bit_wait_io_timeout(struct wait_bit_key *); | ||
862 | 865 | ||
863 | /** | 866 | /** |
864 | * wait_on_bit - wait for a bit to be cleared | 867 | * wait_on_bit - wait for a bit to be cleared |
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 15cab1a4f84e..5a62915f47a8 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c | |||
@@ -343,6 +343,18 @@ int __sched out_of_line_wait_on_bit(void *word, int bit, | |||
343 | } | 343 | } |
344 | EXPORT_SYMBOL(out_of_line_wait_on_bit); | 344 | EXPORT_SYMBOL(out_of_line_wait_on_bit); |
345 | 345 | ||
346 | int __sched out_of_line_wait_on_bit_timeout( | ||
347 | void *word, int bit, wait_bit_action_f *action, | ||
348 | unsigned mode, unsigned long timeout) | ||
349 | { | ||
350 | wait_queue_head_t *wq = bit_waitqueue(word, bit); | ||
351 | DEFINE_WAIT_BIT(wait, word, bit); | ||
352 | |||
353 | wait.key.timeout = jiffies + timeout; | ||
354 | return __wait_on_bit(wq, &wait, action, mode); | ||
355 | } | ||
356 | EXPORT_SYMBOL_GPL(out_of_line_wait_on_bit_timeout); | ||
357 | |||
346 | int __sched | 358 | int __sched |
347 | __wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q, | 359 | __wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q, |
348 | wait_bit_action_f *action, unsigned mode) | 360 | wait_bit_action_f *action, unsigned mode) |
@@ -520,3 +532,27 @@ __sched int bit_wait_io(struct wait_bit_key *word) | |||
520 | return 0; | 532 | return 0; |
521 | } | 533 | } |
522 | EXPORT_SYMBOL(bit_wait_io); | 534 | EXPORT_SYMBOL(bit_wait_io); |
535 | |||
536 | __sched int bit_wait_timeout(struct wait_bit_key *word) | ||
537 | { | ||
538 | unsigned long now = ACCESS_ONCE(jiffies); | ||
539 | if (signal_pending_state(current->state, current)) | ||
540 | return 1; | ||
541 | if (time_after_eq(now, word->timeout)) | ||
542 | return -EAGAIN; | ||
543 | schedule_timeout(word->timeout - now); | ||
544 | return 0; | ||
545 | } | ||
546 | EXPORT_SYMBOL_GPL(bit_wait_timeout); | ||
547 | |||
548 | __sched int bit_wait_io_timeout(struct wait_bit_key *word) | ||
549 | { | ||
550 | unsigned long now = ACCESS_ONCE(jiffies); | ||
551 | if (signal_pending_state(current->state, current)) | ||
552 | return 1; | ||
553 | if (time_after_eq(now, word->timeout)) | ||
554 | return -EAGAIN; | ||
555 | io_schedule_timeout(word->timeout - now); | ||
556 | return 0; | ||
557 | } | ||
558 | EXPORT_SYMBOL_GPL(bit_wait_io_timeout); | ||
diff --git a/mm/filemap.c b/mm/filemap.c index 90effcdf948d..b9b1413080be 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -670,17 +670,13 @@ EXPORT_SYMBOL(__page_cache_alloc); | |||
670 | * at a cost of "thundering herd" phenomena during rare hash | 670 | * at a cost of "thundering herd" phenomena during rare hash |
671 | * collisions. | 671 | * collisions. |
672 | */ | 672 | */ |
673 | static wait_queue_head_t *page_waitqueue(struct page *page) | 673 | wait_queue_head_t *page_waitqueue(struct page *page) |
674 | { | 674 | { |
675 | const struct zone *zone = page_zone(page); | 675 | const struct zone *zone = page_zone(page); |
676 | 676 | ||
677 | return &zone->wait_table[hash_ptr(page, zone->wait_table_bits)]; | 677 | return &zone->wait_table[hash_ptr(page, zone->wait_table_bits)]; |
678 | } | 678 | } |
679 | 679 | EXPORT_SYMBOL(page_waitqueue); | |
680 | static inline void wake_up_page(struct page *page, int bit) | ||
681 | { | ||
682 | __wake_up_bit(page_waitqueue(page), &page->flags, bit); | ||
683 | } | ||
684 | 680 | ||
685 | void wait_on_page_bit(struct page *page, int bit_nr) | 681 | void wait_on_page_bit(struct page *page, int bit_nr) |
686 | { | 682 | { |
@@ -703,6 +699,19 @@ int wait_on_page_bit_killable(struct page *page, int bit_nr) | |||
703 | bit_wait_io, TASK_KILLABLE); | 699 | bit_wait_io, TASK_KILLABLE); |
704 | } | 700 | } |
705 | 701 | ||
702 | int wait_on_page_bit_killable_timeout(struct page *page, | ||
703 | int bit_nr, unsigned long timeout) | ||
704 | { | ||
705 | DEFINE_WAIT_BIT(wait, &page->flags, bit_nr); | ||
706 | |||
707 | wait.key.timeout = jiffies + timeout; | ||
708 | if (!test_bit(bit_nr, &page->flags)) | ||
709 | return 0; | ||
710 | return __wait_on_bit(page_waitqueue(page), &wait, | ||
711 | bit_wait_io_timeout, TASK_KILLABLE); | ||
712 | } | ||
713 | EXPORT_SYMBOL_GPL(wait_on_page_bit_killable_timeout); | ||
714 | |||
706 | /** | 715 | /** |
707 | * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue | 716 | * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue |
708 | * @page: Page defining the wait queue of interest | 717 | * @page: Page defining the wait queue of interest |
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index e0b94ce4c4e6..9acd6ce88db7 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c | |||
@@ -1916,6 +1916,7 @@ call_transmit_status(struct rpc_task *task) | |||
1916 | case -EHOSTDOWN: | 1916 | case -EHOSTDOWN: |
1917 | case -EHOSTUNREACH: | 1917 | case -EHOSTUNREACH: |
1918 | case -ENETUNREACH: | 1918 | case -ENETUNREACH: |
1919 | case -EPERM: | ||
1919 | if (RPC_IS_SOFTCONN(task)) { | 1920 | if (RPC_IS_SOFTCONN(task)) { |
1920 | xprt_end_transmit(task); | 1921 | xprt_end_transmit(task); |
1921 | rpc_exit(task, task->tk_status); | 1922 | rpc_exit(task, task->tk_status); |
@@ -2021,6 +2022,7 @@ call_status(struct rpc_task *task) | |||
2021 | case -EHOSTDOWN: | 2022 | case -EHOSTDOWN: |
2022 | case -EHOSTUNREACH: | 2023 | case -EHOSTUNREACH: |
2023 | case -ENETUNREACH: | 2024 | case -ENETUNREACH: |
2025 | case -EPERM: | ||
2024 | if (RPC_IS_SOFTCONN(task)) { | 2026 | if (RPC_IS_SOFTCONN(task)) { |
2025 | rpc_exit(task, status); | 2027 | rpc_exit(task, status); |
2026 | break; | 2028 | break; |
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 9358c79fd589..fe3441abdbe5 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c | |||
@@ -821,9 +821,7 @@ void rpc_execute(struct rpc_task *task) | |||
821 | 821 | ||
822 | static void rpc_async_schedule(struct work_struct *work) | 822 | static void rpc_async_schedule(struct work_struct *work) |
823 | { | 823 | { |
824 | current->flags |= PF_FSTRANS; | ||
825 | __rpc_execute(container_of(work, struct rpc_task, u.tk_work)); | 824 | __rpc_execute(container_of(work, struct rpc_task, u.tk_work)); |
826 | current->flags &= ~PF_FSTRANS; | ||
827 | } | 825 | } |
828 | 826 | ||
829 | /** | 827 | /** |
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 2faac4940563..6a4615dd0261 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c | |||
@@ -205,7 +205,6 @@ xprt_rdma_connect_worker(struct work_struct *work) | |||
205 | struct rpc_xprt *xprt = &r_xprt->xprt; | 205 | struct rpc_xprt *xprt = &r_xprt->xprt; |
206 | int rc = 0; | 206 | int rc = 0; |
207 | 207 | ||
208 | current->flags |= PF_FSTRANS; | ||
209 | xprt_clear_connected(xprt); | 208 | xprt_clear_connected(xprt); |
210 | 209 | ||
211 | dprintk("RPC: %s: %sconnect\n", __func__, | 210 | dprintk("RPC: %s: %sconnect\n", __func__, |
@@ -216,7 +215,6 @@ xprt_rdma_connect_worker(struct work_struct *work) | |||
216 | 215 | ||
217 | dprintk("RPC: %s: exit\n", __func__); | 216 | dprintk("RPC: %s: exit\n", __func__); |
218 | xprt_clear_connecting(xprt); | 217 | xprt_clear_connecting(xprt); |
219 | current->flags &= ~PF_FSTRANS; | ||
220 | } | 218 | } |
221 | 219 | ||
222 | /* | 220 | /* |
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 43cd89eacfab..3b305ab17afe 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c | |||
@@ -399,13 +399,13 @@ static int xs_send_kvec(struct socket *sock, struct sockaddr *addr, int addrlen, | |||
399 | return kernel_sendmsg(sock, &msg, NULL, 0, 0); | 399 | return kernel_sendmsg(sock, &msg, NULL, 0, 0); |
400 | } | 400 | } |
401 | 401 | ||
402 | static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned int base, int more, bool zerocopy) | 402 | static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned int base, int more, bool zerocopy, int *sent_p) |
403 | { | 403 | { |
404 | ssize_t (*do_sendpage)(struct socket *sock, struct page *page, | 404 | ssize_t (*do_sendpage)(struct socket *sock, struct page *page, |
405 | int offset, size_t size, int flags); | 405 | int offset, size_t size, int flags); |
406 | struct page **ppage; | 406 | struct page **ppage; |
407 | unsigned int remainder; | 407 | unsigned int remainder; |
408 | int err, sent = 0; | 408 | int err; |
409 | 409 | ||
410 | remainder = xdr->page_len - base; | 410 | remainder = xdr->page_len - base; |
411 | base += xdr->page_base; | 411 | base += xdr->page_base; |
@@ -424,15 +424,15 @@ static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned i | |||
424 | err = do_sendpage(sock, *ppage, base, len, flags); | 424 | err = do_sendpage(sock, *ppage, base, len, flags); |
425 | if (remainder == 0 || err != len) | 425 | if (remainder == 0 || err != len) |
426 | break; | 426 | break; |
427 | sent += err; | 427 | *sent_p += err; |
428 | ppage++; | 428 | ppage++; |
429 | base = 0; | 429 | base = 0; |
430 | } | 430 | } |
431 | if (sent == 0) | 431 | if (err > 0) { |
432 | return err; | 432 | *sent_p += err; |
433 | if (err > 0) | 433 | err = 0; |
434 | sent += err; | 434 | } |
435 | return sent; | 435 | return err; |
436 | } | 436 | } |
437 | 437 | ||
438 | /** | 438 | /** |
@@ -443,12 +443,14 @@ static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned i | |||
443 | * @xdr: buffer containing this request | 443 | * @xdr: buffer containing this request |
444 | * @base: starting position in the buffer | 444 | * @base: starting position in the buffer |
445 | * @zerocopy: true if it is safe to use sendpage() | 445 | * @zerocopy: true if it is safe to use sendpage() |
446 | * @sent_p: return the total number of bytes successfully queued for sending | ||
446 | * | 447 | * |
447 | */ | 448 | */ |
448 | static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, bool zerocopy) | 449 | static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, bool zerocopy, int *sent_p) |
449 | { | 450 | { |
450 | unsigned int remainder = xdr->len - base; | 451 | unsigned int remainder = xdr->len - base; |
451 | int err, sent = 0; | 452 | int err = 0; |
453 | int sent = 0; | ||
452 | 454 | ||
453 | if (unlikely(!sock)) | 455 | if (unlikely(!sock)) |
454 | return -ENOTSOCK; | 456 | return -ENOTSOCK; |
@@ -465,7 +467,7 @@ static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, | |||
465 | err = xs_send_kvec(sock, addr, addrlen, &xdr->head[0], base, remainder != 0); | 467 | err = xs_send_kvec(sock, addr, addrlen, &xdr->head[0], base, remainder != 0); |
466 | if (remainder == 0 || err != len) | 468 | if (remainder == 0 || err != len) |
467 | goto out; | 469 | goto out; |
468 | sent += err; | 470 | *sent_p += err; |
469 | base = 0; | 471 | base = 0; |
470 | } else | 472 | } else |
471 | base -= xdr->head[0].iov_len; | 473 | base -= xdr->head[0].iov_len; |
@@ -473,23 +475,23 @@ static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, | |||
473 | if (base < xdr->page_len) { | 475 | if (base < xdr->page_len) { |
474 | unsigned int len = xdr->page_len - base; | 476 | unsigned int len = xdr->page_len - base; |
475 | remainder -= len; | 477 | remainder -= len; |
476 | err = xs_send_pagedata(sock, xdr, base, remainder != 0, zerocopy); | 478 | err = xs_send_pagedata(sock, xdr, base, remainder != 0, zerocopy, &sent); |
477 | if (remainder == 0 || err != len) | 479 | *sent_p += sent; |
480 | if (remainder == 0 || sent != len) | ||
478 | goto out; | 481 | goto out; |
479 | sent += err; | ||
480 | base = 0; | 482 | base = 0; |
481 | } else | 483 | } else |
482 | base -= xdr->page_len; | 484 | base -= xdr->page_len; |
483 | 485 | ||
484 | if (base >= xdr->tail[0].iov_len) | 486 | if (base >= xdr->tail[0].iov_len) |
485 | return sent; | 487 | return 0; |
486 | err = xs_send_kvec(sock, NULL, 0, &xdr->tail[0], base, 0); | 488 | err = xs_send_kvec(sock, NULL, 0, &xdr->tail[0], base, 0); |
487 | out: | 489 | out: |
488 | if (sent == 0) | 490 | if (err > 0) { |
489 | return err; | 491 | *sent_p += err; |
490 | if (err > 0) | 492 | err = 0; |
491 | sent += err; | 493 | } |
492 | return sent; | 494 | return err; |
493 | } | 495 | } |
494 | 496 | ||
495 | static void xs_nospace_callback(struct rpc_task *task) | 497 | static void xs_nospace_callback(struct rpc_task *task) |
@@ -573,19 +575,20 @@ static int xs_local_send_request(struct rpc_task *task) | |||
573 | container_of(xprt, struct sock_xprt, xprt); | 575 | container_of(xprt, struct sock_xprt, xprt); |
574 | struct xdr_buf *xdr = &req->rq_snd_buf; | 576 | struct xdr_buf *xdr = &req->rq_snd_buf; |
575 | int status; | 577 | int status; |
578 | int sent = 0; | ||
576 | 579 | ||
577 | xs_encode_stream_record_marker(&req->rq_snd_buf); | 580 | xs_encode_stream_record_marker(&req->rq_snd_buf); |
578 | 581 | ||
579 | xs_pktdump("packet data:", | 582 | xs_pktdump("packet data:", |
580 | req->rq_svec->iov_base, req->rq_svec->iov_len); | 583 | req->rq_svec->iov_base, req->rq_svec->iov_len); |
581 | 584 | ||
582 | status = xs_sendpages(transport->sock, NULL, 0, | 585 | status = xs_sendpages(transport->sock, NULL, 0, xdr, req->rq_bytes_sent, |
583 | xdr, req->rq_bytes_sent, true); | 586 | true, &sent); |
584 | dprintk("RPC: %s(%u) = %d\n", | 587 | dprintk("RPC: %s(%u) = %d\n", |
585 | __func__, xdr->len - req->rq_bytes_sent, status); | 588 | __func__, xdr->len - req->rq_bytes_sent, status); |
586 | if (likely(status >= 0)) { | 589 | if (likely(sent > 0) || status == 0) { |
587 | req->rq_bytes_sent += status; | 590 | req->rq_bytes_sent += sent; |
588 | req->rq_xmit_bytes_sent += status; | 591 | req->rq_xmit_bytes_sent += sent; |
589 | if (likely(req->rq_bytes_sent >= req->rq_slen)) { | 592 | if (likely(req->rq_bytes_sent >= req->rq_slen)) { |
590 | req->rq_bytes_sent = 0; | 593 | req->rq_bytes_sent = 0; |
591 | return 0; | 594 | return 0; |
@@ -626,6 +629,7 @@ static int xs_udp_send_request(struct rpc_task *task) | |||
626 | struct rpc_xprt *xprt = req->rq_xprt; | 629 | struct rpc_xprt *xprt = req->rq_xprt; |
627 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); | 630 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); |
628 | struct xdr_buf *xdr = &req->rq_snd_buf; | 631 | struct xdr_buf *xdr = &req->rq_snd_buf; |
632 | int sent = 0; | ||
629 | int status; | 633 | int status; |
630 | 634 | ||
631 | xs_pktdump("packet data:", | 635 | xs_pktdump("packet data:", |
@@ -634,22 +638,25 @@ static int xs_udp_send_request(struct rpc_task *task) | |||
634 | 638 | ||
635 | if (!xprt_bound(xprt)) | 639 | if (!xprt_bound(xprt)) |
636 | return -ENOTCONN; | 640 | return -ENOTCONN; |
637 | status = xs_sendpages(transport->sock, | 641 | status = xs_sendpages(transport->sock, xs_addr(xprt), xprt->addrlen, |
638 | xs_addr(xprt), | 642 | xdr, req->rq_bytes_sent, true, &sent); |
639 | xprt->addrlen, xdr, | ||
640 | req->rq_bytes_sent, true); | ||
641 | 643 | ||
642 | dprintk("RPC: xs_udp_send_request(%u) = %d\n", | 644 | dprintk("RPC: xs_udp_send_request(%u) = %d\n", |
643 | xdr->len - req->rq_bytes_sent, status); | 645 | xdr->len - req->rq_bytes_sent, status); |
644 | 646 | ||
645 | if (status >= 0) { | 647 | /* firewall is blocking us, don't return -EAGAIN or we end up looping */ |
646 | req->rq_xmit_bytes_sent += status; | 648 | if (status == -EPERM) |
647 | if (status >= req->rq_slen) | 649 | goto process_status; |
650 | |||
651 | if (sent > 0 || status == 0) { | ||
652 | req->rq_xmit_bytes_sent += sent; | ||
653 | if (sent >= req->rq_slen) | ||
648 | return 0; | 654 | return 0; |
649 | /* Still some bytes left; set up for a retry later. */ | 655 | /* Still some bytes left; set up for a retry later. */ |
650 | status = -EAGAIN; | 656 | status = -EAGAIN; |
651 | } | 657 | } |
652 | 658 | ||
659 | process_status: | ||
653 | switch (status) { | 660 | switch (status) { |
654 | case -ENOTSOCK: | 661 | case -ENOTSOCK: |
655 | status = -ENOTCONN; | 662 | status = -ENOTCONN; |
@@ -665,6 +672,7 @@ static int xs_udp_send_request(struct rpc_task *task) | |||
665 | case -ENOBUFS: | 672 | case -ENOBUFS: |
666 | case -EPIPE: | 673 | case -EPIPE: |
667 | case -ECONNREFUSED: | 674 | case -ECONNREFUSED: |
675 | case -EPERM: | ||
668 | /* When the server has died, an ICMP port unreachable message | 676 | /* When the server has died, an ICMP port unreachable message |
669 | * prompts ECONNREFUSED. */ | 677 | * prompts ECONNREFUSED. */ |
670 | clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); | 678 | clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); |
@@ -713,6 +721,7 @@ static int xs_tcp_send_request(struct rpc_task *task) | |||
713 | struct xdr_buf *xdr = &req->rq_snd_buf; | 721 | struct xdr_buf *xdr = &req->rq_snd_buf; |
714 | bool zerocopy = true; | 722 | bool zerocopy = true; |
715 | int status; | 723 | int status; |
724 | int sent; | ||
716 | 725 | ||
717 | xs_encode_stream_record_marker(&req->rq_snd_buf); | 726 | xs_encode_stream_record_marker(&req->rq_snd_buf); |
718 | 727 | ||
@@ -730,26 +739,26 @@ static int xs_tcp_send_request(struct rpc_task *task) | |||
730 | * to cope with writespace callbacks arriving _after_ we have | 739 | * to cope with writespace callbacks arriving _after_ we have |
731 | * called sendmsg(). */ | 740 | * called sendmsg(). */ |
732 | while (1) { | 741 | while (1) { |
733 | status = xs_sendpages(transport->sock, | 742 | sent = 0; |
734 | NULL, 0, xdr, req->rq_bytes_sent, | 743 | status = xs_sendpages(transport->sock, NULL, 0, xdr, |
735 | zerocopy); | 744 | req->rq_bytes_sent, zerocopy, &sent); |
736 | 745 | ||
737 | dprintk("RPC: xs_tcp_send_request(%u) = %d\n", | 746 | dprintk("RPC: xs_tcp_send_request(%u) = %d\n", |
738 | xdr->len - req->rq_bytes_sent, status); | 747 | xdr->len - req->rq_bytes_sent, status); |
739 | 748 | ||
740 | if (unlikely(status < 0)) | 749 | if (unlikely(sent == 0 && status < 0)) |
741 | break; | 750 | break; |
742 | 751 | ||
743 | /* If we've sent the entire packet, immediately | 752 | /* If we've sent the entire packet, immediately |
744 | * reset the count of bytes sent. */ | 753 | * reset the count of bytes sent. */ |
745 | req->rq_bytes_sent += status; | 754 | req->rq_bytes_sent += sent; |
746 | req->rq_xmit_bytes_sent += status; | 755 | req->rq_xmit_bytes_sent += sent; |
747 | if (likely(req->rq_bytes_sent >= req->rq_slen)) { | 756 | if (likely(req->rq_bytes_sent >= req->rq_slen)) { |
748 | req->rq_bytes_sent = 0; | 757 | req->rq_bytes_sent = 0; |
749 | return 0; | 758 | return 0; |
750 | } | 759 | } |
751 | 760 | ||
752 | if (status != 0) | 761 | if (sent != 0) |
753 | continue; | 762 | continue; |
754 | status = -EAGAIN; | 763 | status = -EAGAIN; |
755 | break; | 764 | break; |
@@ -845,6 +854,8 @@ static void xs_error_report(struct sock *sk) | |||
845 | dprintk("RPC: xs_error_report client %p, error=%d...\n", | 854 | dprintk("RPC: xs_error_report client %p, error=%d...\n", |
846 | xprt, -err); | 855 | xprt, -err); |
847 | trace_rpc_socket_error(xprt, sk->sk_socket, err); | 856 | trace_rpc_socket_error(xprt, sk->sk_socket, err); |
857 | if (test_bit(XPRT_CONNECTION_REUSE, &xprt->state)) | ||
858 | goto out; | ||
848 | xprt_wake_pending_tasks(xprt, err); | 859 | xprt_wake_pending_tasks(xprt, err); |
849 | out: | 860 | out: |
850 | read_unlock_bh(&sk->sk_callback_lock); | 861 | read_unlock_bh(&sk->sk_callback_lock); |
@@ -1746,13 +1757,29 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock) | |||
1746 | unsigned short port = xs_get_srcport(transport); | 1757 | unsigned short port = xs_get_srcport(transport); |
1747 | unsigned short last; | 1758 | unsigned short last; |
1748 | 1759 | ||
1760 | /* | ||
1761 | * If we are asking for any ephemeral port (i.e. port == 0 && | ||
1762 | * transport->xprt.resvport == 0), don't bind. Let the local | ||
1763 | * port selection happen implicitly when the socket is used | ||
1764 | * (for example at connect time). | ||
1765 | * | ||
1766 | * This ensures that we can continue to establish TCP | ||
1767 | * connections even when all local ephemeral ports are already | ||
1768 | * a part of some TCP connection. This makes no difference | ||
1769 | * for UDP sockets, but also doens't harm them. | ||
1770 | * | ||
1771 | * If we're asking for any reserved port (i.e. port == 0 && | ||
1772 | * transport->xprt.resvport == 1) xs_get_srcport above will | ||
1773 | * ensure that port is non-zero and we will bind as needed. | ||
1774 | */ | ||
1775 | if (port == 0) | ||
1776 | return 0; | ||
1777 | |||
1749 | memcpy(&myaddr, &transport->srcaddr, transport->xprt.addrlen); | 1778 | memcpy(&myaddr, &transport->srcaddr, transport->xprt.addrlen); |
1750 | do { | 1779 | do { |
1751 | rpc_set_port((struct sockaddr *)&myaddr, port); | 1780 | rpc_set_port((struct sockaddr *)&myaddr, port); |
1752 | err = kernel_bind(sock, (struct sockaddr *)&myaddr, | 1781 | err = kernel_bind(sock, (struct sockaddr *)&myaddr, |
1753 | transport->xprt.addrlen); | 1782 | transport->xprt.addrlen); |
1754 | if (port == 0) | ||
1755 | break; | ||
1756 | if (err == 0) { | 1783 | if (err == 0) { |
1757 | transport->srcport = port; | 1784 | transport->srcport = port; |
1758 | break; | 1785 | break; |
@@ -1927,8 +1954,6 @@ static int xs_local_setup_socket(struct sock_xprt *transport) | |||
1927 | struct socket *sock; | 1954 | struct socket *sock; |
1928 | int status = -EIO; | 1955 | int status = -EIO; |
1929 | 1956 | ||
1930 | current->flags |= PF_FSTRANS; | ||
1931 | |||
1932 | clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); | 1957 | clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); |
1933 | status = __sock_create(xprt->xprt_net, AF_LOCAL, | 1958 | status = __sock_create(xprt->xprt_net, AF_LOCAL, |
1934 | SOCK_STREAM, 0, &sock, 1); | 1959 | SOCK_STREAM, 0, &sock, 1); |
@@ -1968,7 +1993,6 @@ static int xs_local_setup_socket(struct sock_xprt *transport) | |||
1968 | out: | 1993 | out: |
1969 | xprt_clear_connecting(xprt); | 1994 | xprt_clear_connecting(xprt); |
1970 | xprt_wake_pending_tasks(xprt, status); | 1995 | xprt_wake_pending_tasks(xprt, status); |
1971 | current->flags &= ~PF_FSTRANS; | ||
1972 | return status; | 1996 | return status; |
1973 | } | 1997 | } |
1974 | 1998 | ||
@@ -2071,8 +2095,6 @@ static void xs_udp_setup_socket(struct work_struct *work) | |||
2071 | struct socket *sock = transport->sock; | 2095 | struct socket *sock = transport->sock; |
2072 | int status = -EIO; | 2096 | int status = -EIO; |
2073 | 2097 | ||
2074 | current->flags |= PF_FSTRANS; | ||
2075 | |||
2076 | /* Start by resetting any existing state */ | 2098 | /* Start by resetting any existing state */ |
2077 | xs_reset_transport(transport); | 2099 | xs_reset_transport(transport); |
2078 | sock = xs_create_sock(xprt, transport, | 2100 | sock = xs_create_sock(xprt, transport, |
@@ -2092,7 +2114,6 @@ static void xs_udp_setup_socket(struct work_struct *work) | |||
2092 | out: | 2114 | out: |
2093 | xprt_clear_connecting(xprt); | 2115 | xprt_clear_connecting(xprt); |
2094 | xprt_wake_pending_tasks(xprt, status); | 2116 | xprt_wake_pending_tasks(xprt, status); |
2095 | current->flags &= ~PF_FSTRANS; | ||
2096 | } | 2117 | } |
2097 | 2118 | ||
2098 | /* | 2119 | /* |
@@ -2229,8 +2250,6 @@ static void xs_tcp_setup_socket(struct work_struct *work) | |||
2229 | struct rpc_xprt *xprt = &transport->xprt; | 2250 | struct rpc_xprt *xprt = &transport->xprt; |
2230 | int status = -EIO; | 2251 | int status = -EIO; |
2231 | 2252 | ||
2232 | current->flags |= PF_FSTRANS; | ||
2233 | |||
2234 | if (!sock) { | 2253 | if (!sock) { |
2235 | clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); | 2254 | clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); |
2236 | sock = xs_create_sock(xprt, transport, | 2255 | sock = xs_create_sock(xprt, transport, |
@@ -2245,7 +2264,9 @@ static void xs_tcp_setup_socket(struct work_struct *work) | |||
2245 | abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT, | 2264 | abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT, |
2246 | &xprt->state); | 2265 | &xprt->state); |
2247 | /* "close" the socket, preserving the local port */ | 2266 | /* "close" the socket, preserving the local port */ |
2267 | set_bit(XPRT_CONNECTION_REUSE, &xprt->state); | ||
2248 | xs_tcp_reuse_connection(transport); | 2268 | xs_tcp_reuse_connection(transport); |
2269 | clear_bit(XPRT_CONNECTION_REUSE, &xprt->state); | ||
2249 | 2270 | ||
2250 | if (abort_and_exit) | 2271 | if (abort_and_exit) |
2251 | goto out_eagain; | 2272 | goto out_eagain; |
@@ -2276,7 +2297,6 @@ static void xs_tcp_setup_socket(struct work_struct *work) | |||
2276 | case -EINPROGRESS: | 2297 | case -EINPROGRESS: |
2277 | case -EALREADY: | 2298 | case -EALREADY: |
2278 | xprt_clear_connecting(xprt); | 2299 | xprt_clear_connecting(xprt); |
2279 | current->flags &= ~PF_FSTRANS; | ||
2280 | return; | 2300 | return; |
2281 | case -EINVAL: | 2301 | case -EINVAL: |
2282 | /* Happens, for instance, if the user specified a link | 2302 | /* Happens, for instance, if the user specified a link |
@@ -2294,7 +2314,6 @@ out_eagain: | |||
2294 | out: | 2314 | out: |
2295 | xprt_clear_connecting(xprt); | 2315 | xprt_clear_connecting(xprt); |
2296 | xprt_wake_pending_tasks(xprt, status); | 2316 | xprt_wake_pending_tasks(xprt, status); |
2297 | current->flags &= ~PF_FSTRANS; | ||
2298 | } | 2317 | } |
2299 | 2318 | ||
2300 | /** | 2319 | /** |