diff options
Diffstat (limited to 'fs/nfs')
-rw-r--r-- | fs/nfs/blocklayout/blocklayoutdev.c | 208 |
1 files changed, 206 insertions, 2 deletions
diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c index b23fe601d1c9..a83b393fb01c 100644 --- a/fs/nfs/blocklayout/blocklayoutdev.c +++ b/fs/nfs/blocklayout/blocklayoutdev.c | |||
@@ -40,6 +40,19 @@ | |||
40 | 40 | ||
41 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | 41 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD |
42 | 42 | ||
43 | static int decode_sector_number(__be32 **rp, sector_t *sp) | ||
44 | { | ||
45 | uint64_t s; | ||
46 | |||
47 | *rp = xdr_decode_hyper(*rp, &s); | ||
48 | if (s & 0x1ff) { | ||
49 | printk(KERN_WARNING "%s: sector not aligned\n", __func__); | ||
50 | return -1; | ||
51 | } | ||
52 | *sp = s >> SECTOR_SHIFT; | ||
53 | return 0; | ||
54 | } | ||
55 | |||
43 | /* Open a block_device by device number. */ | 56 | /* Open a block_device by device number. */ |
44 | struct block_device *nfs4_blkdev_get(dev_t dev) | 57 | struct block_device *nfs4_blkdev_get(dev_t dev) |
45 | { | 58 | { |
@@ -197,10 +210,201 @@ out: | |||
197 | return rv; | 210 | return rv; |
198 | } | 211 | } |
199 | 212 | ||
213 | /* Map deviceid returned by the server to constructed block_device */ | ||
214 | static struct block_device *translate_devid(struct pnfs_layout_hdr *lo, | ||
215 | struct nfs4_deviceid *id) | ||
216 | { | ||
217 | struct block_device *rv = NULL; | ||
218 | struct block_mount_id *mid; | ||
219 | struct pnfs_block_dev *dev; | ||
220 | |||
221 | dprintk("%s enter, lo=%p, id=%p\n", __func__, lo, id); | ||
222 | mid = BLK_ID(lo); | ||
223 | spin_lock(&mid->bm_lock); | ||
224 | list_for_each_entry(dev, &mid->bm_devlist, bm_node) { | ||
225 | if (memcmp(id->data, dev->bm_mdevid.data, | ||
226 | NFS4_DEVICEID4_SIZE) == 0) { | ||
227 | rv = dev->bm_mdev; | ||
228 | goto out; | ||
229 | } | ||
230 | } | ||
231 | out: | ||
232 | spin_unlock(&mid->bm_lock); | ||
233 | dprintk("%s returning %p\n", __func__, rv); | ||
234 | return rv; | ||
235 | } | ||
236 | |||
237 | /* Tracks info needed to ensure extents in layout obey constraints of spec */ | ||
238 | struct layout_verification { | ||
239 | u32 mode; /* R or RW */ | ||
240 | u64 start; /* Expected start of next non-COW extent */ | ||
241 | u64 inval; /* Start of INVAL coverage */ | ||
242 | u64 cowread; /* End of COW read coverage */ | ||
243 | }; | ||
244 | |||
245 | /* Verify the extent meets the layout requirements of the pnfs-block draft, | ||
246 | * section 2.3.1. | ||
247 | */ | ||
248 | static int verify_extent(struct pnfs_block_extent *be, | ||
249 | struct layout_verification *lv) | ||
250 | { | ||
251 | if (lv->mode == IOMODE_READ) { | ||
252 | if (be->be_state == PNFS_BLOCK_READWRITE_DATA || | ||
253 | be->be_state == PNFS_BLOCK_INVALID_DATA) | ||
254 | return -EIO; | ||
255 | if (be->be_f_offset != lv->start) | ||
256 | return -EIO; | ||
257 | lv->start += be->be_length; | ||
258 | return 0; | ||
259 | } | ||
260 | /* lv->mode == IOMODE_RW */ | ||
261 | if (be->be_state == PNFS_BLOCK_READWRITE_DATA) { | ||
262 | if (be->be_f_offset != lv->start) | ||
263 | return -EIO; | ||
264 | if (lv->cowread > lv->start) | ||
265 | return -EIO; | ||
266 | lv->start += be->be_length; | ||
267 | lv->inval = lv->start; | ||
268 | return 0; | ||
269 | } else if (be->be_state == PNFS_BLOCK_INVALID_DATA) { | ||
270 | if (be->be_f_offset != lv->start) | ||
271 | return -EIO; | ||
272 | lv->start += be->be_length; | ||
273 | return 0; | ||
274 | } else if (be->be_state == PNFS_BLOCK_READ_DATA) { | ||
275 | if (be->be_f_offset > lv->start) | ||
276 | return -EIO; | ||
277 | if (be->be_f_offset < lv->inval) | ||
278 | return -EIO; | ||
279 | if (be->be_f_offset < lv->cowread) | ||
280 | return -EIO; | ||
281 | /* It looks like you might want to min this with lv->start, | ||
282 | * but you really don't. | ||
283 | */ | ||
284 | lv->inval = lv->inval + be->be_length; | ||
285 | lv->cowread = be->be_f_offset + be->be_length; | ||
286 | return 0; | ||
287 | } else | ||
288 | return -EIO; | ||
289 | } | ||
290 | |||
291 | /* XDR decode pnfs_block_layout4 structure */ | ||
200 | int | 292 | int |
201 | nfs4_blk_process_layoutget(struct pnfs_layout_hdr *lo, | 293 | nfs4_blk_process_layoutget(struct pnfs_layout_hdr *lo, |
202 | struct nfs4_layoutget_res *lgr, gfp_t gfp_flags) | 294 | struct nfs4_layoutget_res *lgr, gfp_t gfp_flags) |
203 | { | 295 | { |
204 | /* STUB */ | 296 | struct pnfs_block_layout *bl = BLK_LO2EXT(lo); |
205 | return -EIO; | 297 | int i, status = -EIO; |
298 | uint32_t count; | ||
299 | struct pnfs_block_extent *be = NULL, *save; | ||
300 | struct xdr_stream stream; | ||
301 | struct xdr_buf buf; | ||
302 | struct page *scratch; | ||
303 | __be32 *p; | ||
304 | struct layout_verification lv = { | ||
305 | .mode = lgr->range.iomode, | ||
306 | .start = lgr->range.offset >> SECTOR_SHIFT, | ||
307 | .inval = lgr->range.offset >> SECTOR_SHIFT, | ||
308 | .cowread = lgr->range.offset >> SECTOR_SHIFT, | ||
309 | }; | ||
310 | LIST_HEAD(extents); | ||
311 | |||
312 | dprintk("---> %s\n", __func__); | ||
313 | |||
314 | scratch = alloc_page(gfp_flags); | ||
315 | if (!scratch) | ||
316 | return -ENOMEM; | ||
317 | |||
318 | xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, lgr->layoutp->len); | ||
319 | xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); | ||
320 | |||
321 | p = xdr_inline_decode(&stream, 4); | ||
322 | if (unlikely(!p)) | ||
323 | goto out_err; | ||
324 | |||
325 | count = be32_to_cpup(p++); | ||
326 | |||
327 | dprintk("%s enter, number of extents %i\n", __func__, count); | ||
328 | p = xdr_inline_decode(&stream, (28 + NFS4_DEVICEID4_SIZE) * count); | ||
329 | if (unlikely(!p)) | ||
330 | goto out_err; | ||
331 | |||
332 | /* Decode individual extents, putting them in temporary | ||
333 | * staging area until whole layout is decoded to make error | ||
334 | * recovery easier. | ||
335 | */ | ||
336 | for (i = 0; i < count; i++) { | ||
337 | be = bl_alloc_extent(); | ||
338 | if (!be) { | ||
339 | status = -ENOMEM; | ||
340 | goto out_err; | ||
341 | } | ||
342 | memcpy(&be->be_devid, p, NFS4_DEVICEID4_SIZE); | ||
343 | p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE); | ||
344 | be->be_mdev = translate_devid(lo, &be->be_devid); | ||
345 | if (!be->be_mdev) | ||
346 | goto out_err; | ||
347 | |||
348 | /* The next three values are read in as bytes, | ||
349 | * but stored as 512-byte sector lengths | ||
350 | */ | ||
351 | if (decode_sector_number(&p, &be->be_f_offset) < 0) | ||
352 | goto out_err; | ||
353 | if (decode_sector_number(&p, &be->be_length) < 0) | ||
354 | goto out_err; | ||
355 | if (decode_sector_number(&p, &be->be_v_offset) < 0) | ||
356 | goto out_err; | ||
357 | be->be_state = be32_to_cpup(p++); | ||
358 | if (be->be_state == PNFS_BLOCK_INVALID_DATA) | ||
359 | be->be_inval = &bl->bl_inval; | ||
360 | if (verify_extent(be, &lv)) { | ||
361 | dprintk("%s verify failed\n", __func__); | ||
362 | goto out_err; | ||
363 | } | ||
364 | list_add_tail(&be->be_node, &extents); | ||
365 | } | ||
366 | if (lgr->range.offset + lgr->range.length != | ||
367 | lv.start << SECTOR_SHIFT) { | ||
368 | dprintk("%s Final length mismatch\n", __func__); | ||
369 | be = NULL; | ||
370 | goto out_err; | ||
371 | } | ||
372 | if (lv.start < lv.cowread) { | ||
373 | dprintk("%s Final uncovered COW extent\n", __func__); | ||
374 | be = NULL; | ||
375 | goto out_err; | ||
376 | } | ||
377 | /* Extents decoded properly, now try to merge them in to | ||
378 | * existing layout extents. | ||
379 | */ | ||
380 | spin_lock(&bl->bl_ext_lock); | ||
381 | list_for_each_entry_safe(be, save, &extents, be_node) { | ||
382 | list_del(&be->be_node); | ||
383 | status = bl_add_merge_extent(bl, be); | ||
384 | if (status) { | ||
385 | spin_unlock(&bl->bl_ext_lock); | ||
386 | /* This is a fairly catastrophic error, as the | ||
387 | * entire layout extent lists are now corrupted. | ||
388 | * We should have some way to distinguish this. | ||
389 | */ | ||
390 | be = NULL; | ||
391 | goto out_err; | ||
392 | } | ||
393 | } | ||
394 | spin_unlock(&bl->bl_ext_lock); | ||
395 | status = 0; | ||
396 | out: | ||
397 | __free_page(scratch); | ||
398 | dprintk("%s returns %i\n", __func__, status); | ||
399 | return status; | ||
400 | |||
401 | out_err: | ||
402 | bl_put_extent(be); | ||
403 | while (!list_empty(&extents)) { | ||
404 | be = list_first_entry(&extents, struct pnfs_block_extent, | ||
405 | be_node); | ||
406 | list_del(&be->be_node); | ||
407 | bl_put_extent(be); | ||
408 | } | ||
409 | goto out; | ||
206 | } | 410 | } |