aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorFred Isaman <iisaman@citi.umich.edu>2011-07-30 20:52:47 -0400
committerTrond Myklebust <Trond.Myklebust@netapp.com>2011-07-31 12:18:16 -0400
commite9437ccef92a28ba4c9009404bb8c9b5672dc54a (patch)
treee14c1076b32c7d1ce4ba19c98beade79a74ac818 /fs
parent2f9fd182607e7b3bdca35f6ed7f2fae539f7c46b (diff)
pnfsblock: xdr decode pnfs_block_layout4
XDR decodes the block layout payload sent in LAYOUTGET result, storing the result in an extent list. [pnfsblock: get rid of deprecated xdr macros] Signed-off-by: Jim Rees <rees@umich.edu> Signed-off-by: Fred Isaman <iisaman@citi.umich.edu> [pnfsblock: fix bug getting pnfs_layout_type in translate_devid().] Signed-off-by: Tao Guo <guotao@nrchpc.ac.cn> Signed-off-by: Benny Halevy <bhalevy@panasas.com> Signed-off-by: Benny Halevy <bhalevy@tonian.com> Signed-off-by: Jim Rees <rees@umich.edu> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/nfs/blocklayout/blocklayoutdev.c208
1 files changed, 206 insertions, 2 deletions
diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c
index b23fe601d1c9..a83b393fb01c 100644
--- a/fs/nfs/blocklayout/blocklayoutdev.c
+++ b/fs/nfs/blocklayout/blocklayoutdev.c
@@ -40,6 +40,19 @@
40 40
41#define NFSDBG_FACILITY NFSDBG_PNFS_LD 41#define NFSDBG_FACILITY NFSDBG_PNFS_LD
42 42
43static int decode_sector_number(__be32 **rp, sector_t *sp)
44{
45 uint64_t s;
46
47 *rp = xdr_decode_hyper(*rp, &s);
48 if (s & 0x1ff) {
49 printk(KERN_WARNING "%s: sector not aligned\n", __func__);
50 return -1;
51 }
52 *sp = s >> SECTOR_SHIFT;
53 return 0;
54}
55
43/* Open a block_device by device number. */ 56/* Open a block_device by device number. */
44struct block_device *nfs4_blkdev_get(dev_t dev) 57struct block_device *nfs4_blkdev_get(dev_t dev)
45{ 58{
@@ -197,10 +210,201 @@ out:
197 return rv; 210 return rv;
198} 211}
199 212
213/* Map deviceid returned by the server to constructed block_device */
214static struct block_device *translate_devid(struct pnfs_layout_hdr *lo,
215 struct nfs4_deviceid *id)
216{
217 struct block_device *rv = NULL;
218 struct block_mount_id *mid;
219 struct pnfs_block_dev *dev;
220
221 dprintk("%s enter, lo=%p, id=%p\n", __func__, lo, id);
222 mid = BLK_ID(lo);
223 spin_lock(&mid->bm_lock);
224 list_for_each_entry(dev, &mid->bm_devlist, bm_node) {
225 if (memcmp(id->data, dev->bm_mdevid.data,
226 NFS4_DEVICEID4_SIZE) == 0) {
227 rv = dev->bm_mdev;
228 goto out;
229 }
230 }
231 out:
232 spin_unlock(&mid->bm_lock);
233 dprintk("%s returning %p\n", __func__, rv);
234 return rv;
235}
236
237/* Tracks info needed to ensure extents in layout obey constraints of spec */
238struct layout_verification {
239 u32 mode; /* R or RW */
240 u64 start; /* Expected start of next non-COW extent */
241 u64 inval; /* Start of INVAL coverage */
242 u64 cowread; /* End of COW read coverage */
243};
244
245/* Verify the extent meets the layout requirements of the pnfs-block draft,
246 * section 2.3.1.
247 */
248static int verify_extent(struct pnfs_block_extent *be,
249 struct layout_verification *lv)
250{
251 if (lv->mode == IOMODE_READ) {
252 if (be->be_state == PNFS_BLOCK_READWRITE_DATA ||
253 be->be_state == PNFS_BLOCK_INVALID_DATA)
254 return -EIO;
255 if (be->be_f_offset != lv->start)
256 return -EIO;
257 lv->start += be->be_length;
258 return 0;
259 }
260 /* lv->mode == IOMODE_RW */
261 if (be->be_state == PNFS_BLOCK_READWRITE_DATA) {
262 if (be->be_f_offset != lv->start)
263 return -EIO;
264 if (lv->cowread > lv->start)
265 return -EIO;
266 lv->start += be->be_length;
267 lv->inval = lv->start;
268 return 0;
269 } else if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
270 if (be->be_f_offset != lv->start)
271 return -EIO;
272 lv->start += be->be_length;
273 return 0;
274 } else if (be->be_state == PNFS_BLOCK_READ_DATA) {
275 if (be->be_f_offset > lv->start)
276 return -EIO;
277 if (be->be_f_offset < lv->inval)
278 return -EIO;
279 if (be->be_f_offset < lv->cowread)
280 return -EIO;
281 /* It looks like you might want to min this with lv->start,
282 * but you really don't.
283 */
284 lv->inval = lv->inval + be->be_length;
285 lv->cowread = be->be_f_offset + be->be_length;
286 return 0;
287 } else
288 return -EIO;
289}
290
291/* XDR decode pnfs_block_layout4 structure */
200int 292int
201nfs4_blk_process_layoutget(struct pnfs_layout_hdr *lo, 293nfs4_blk_process_layoutget(struct pnfs_layout_hdr *lo,
202 struct nfs4_layoutget_res *lgr, gfp_t gfp_flags) 294 struct nfs4_layoutget_res *lgr, gfp_t gfp_flags)
203{ 295{
204 /* STUB */ 296 struct pnfs_block_layout *bl = BLK_LO2EXT(lo);
205 return -EIO; 297 int i, status = -EIO;
298 uint32_t count;
299 struct pnfs_block_extent *be = NULL, *save;
300 struct xdr_stream stream;
301 struct xdr_buf buf;
302 struct page *scratch;
303 __be32 *p;
304 struct layout_verification lv = {
305 .mode = lgr->range.iomode,
306 .start = lgr->range.offset >> SECTOR_SHIFT,
307 .inval = lgr->range.offset >> SECTOR_SHIFT,
308 .cowread = lgr->range.offset >> SECTOR_SHIFT,
309 };
310 LIST_HEAD(extents);
311
312 dprintk("---> %s\n", __func__);
313
314 scratch = alloc_page(gfp_flags);
315 if (!scratch)
316 return -ENOMEM;
317
318 xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, lgr->layoutp->len);
319 xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
320
321 p = xdr_inline_decode(&stream, 4);
322 if (unlikely(!p))
323 goto out_err;
324
325 count = be32_to_cpup(p++);
326
327 dprintk("%s enter, number of extents %i\n", __func__, count);
328 p = xdr_inline_decode(&stream, (28 + NFS4_DEVICEID4_SIZE) * count);
329 if (unlikely(!p))
330 goto out_err;
331
332 /* Decode individual extents, putting them in temporary
333 * staging area until whole layout is decoded to make error
334 * recovery easier.
335 */
336 for (i = 0; i < count; i++) {
337 be = bl_alloc_extent();
338 if (!be) {
339 status = -ENOMEM;
340 goto out_err;
341 }
342 memcpy(&be->be_devid, p, NFS4_DEVICEID4_SIZE);
343 p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE);
344 be->be_mdev = translate_devid(lo, &be->be_devid);
345 if (!be->be_mdev)
346 goto out_err;
347
348 /* The next three values are read in as bytes,
349 * but stored as 512-byte sector lengths
350 */
351 if (decode_sector_number(&p, &be->be_f_offset) < 0)
352 goto out_err;
353 if (decode_sector_number(&p, &be->be_length) < 0)
354 goto out_err;
355 if (decode_sector_number(&p, &be->be_v_offset) < 0)
356 goto out_err;
357 be->be_state = be32_to_cpup(p++);
358 if (be->be_state == PNFS_BLOCK_INVALID_DATA)
359 be->be_inval = &bl->bl_inval;
360 if (verify_extent(be, &lv)) {
361 dprintk("%s verify failed\n", __func__);
362 goto out_err;
363 }
364 list_add_tail(&be->be_node, &extents);
365 }
366 if (lgr->range.offset + lgr->range.length !=
367 lv.start << SECTOR_SHIFT) {
368 dprintk("%s Final length mismatch\n", __func__);
369 be = NULL;
370 goto out_err;
371 }
372 if (lv.start < lv.cowread) {
373 dprintk("%s Final uncovered COW extent\n", __func__);
374 be = NULL;
375 goto out_err;
376 }
377 /* Extents decoded properly, now try to merge them in to
378 * existing layout extents.
379 */
380 spin_lock(&bl->bl_ext_lock);
381 list_for_each_entry_safe(be, save, &extents, be_node) {
382 list_del(&be->be_node);
383 status = bl_add_merge_extent(bl, be);
384 if (status) {
385 spin_unlock(&bl->bl_ext_lock);
386 /* This is a fairly catastrophic error, as the
387 * entire layout extent lists are now corrupted.
388 * We should have some way to distinguish this.
389 */
390 be = NULL;
391 goto out_err;
392 }
393 }
394 spin_unlock(&bl->bl_ext_lock);
395 status = 0;
396 out:
397 __free_page(scratch);
398 dprintk("%s returns %i\n", __func__, status);
399 return status;
400
401 out_err:
402 bl_put_extent(be);
403 while (!list_empty(&extents)) {
404 be = list_first_entry(&extents, struct pnfs_block_extent,
405 be_node);
406 list_del(&be->be_node);
407 bl_put_extent(be);
408 }
409 goto out;
206} 410}