aboutsummaryrefslogtreecommitdiffstats
path: root/fs/nfs/pnfs.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/nfs/pnfs.c')
-rw-r--r--fs/nfs/pnfs.c385
1 files changed, 353 insertions, 32 deletions
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 891a0c36f992..d1ad7df3479e 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -140,6 +140,11 @@ pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
140 printk(KERN_ERR "%s id 0 is reserved\n", __func__); 140 printk(KERN_ERR "%s id 0 is reserved\n", __func__);
141 return status; 141 return status;
142 } 142 }
143 if (!ld_type->alloc_lseg || !ld_type->free_lseg) {
144 printk(KERN_ERR "%s Layout driver must provide "
145 "alloc_lseg and free_lseg.\n", __func__);
146 return status;
147 }
143 148
144 spin_lock(&pnfs_spinlock); 149 spin_lock(&pnfs_spinlock);
145 tmp = find_pnfs_driver_locked(ld_type->id); 150 tmp = find_pnfs_driver_locked(ld_type->id);
@@ -168,6 +173,10 @@ pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
168} 173}
169EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver); 174EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver);
170 175
176/*
177 * pNFS client layout cache
178 */
179
171static void 180static void
172get_layout_hdr_locked(struct pnfs_layout_hdr *lo) 181get_layout_hdr_locked(struct pnfs_layout_hdr *lo)
173{ 182{
@@ -190,7 +199,7 @@ put_layout_hdr_locked(struct pnfs_layout_hdr *lo)
190 } 199 }
191} 200}
192 201
193static void 202void
194put_layout_hdr(struct inode *inode) 203put_layout_hdr(struct inode *inode)
195{ 204{
196 spin_lock(&inode->i_lock); 205 spin_lock(&inode->i_lock);
@@ -215,7 +224,7 @@ destroy_lseg(struct kref *kref)
215 struct inode *ino = lseg->layout->inode; 224 struct inode *ino = lseg->layout->inode;
216 225
217 dprintk("--> %s\n", __func__); 226 dprintk("--> %s\n", __func__);
218 kfree(lseg); 227 NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
219 /* Matched by get_layout_hdr_locked in pnfs_insert_layout */ 228 /* Matched by get_layout_hdr_locked in pnfs_insert_layout */
220 put_layout_hdr(ino); 229 put_layout_hdr(ino);
221} 230}
@@ -249,6 +258,9 @@ pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list)
249 /* List does not take a reference, so no need for put here */ 258 /* List does not take a reference, so no need for put here */
250 list_del_init(&lo->layouts); 259 list_del_init(&lo->layouts);
251 spin_unlock(&clp->cl_lock); 260 spin_unlock(&clp->cl_lock);
261 write_seqlock(&lo->seqlock);
262 clear_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
263 write_sequnlock(&lo->seqlock);
252 264
253 dprintk("%s:Return\n", __func__); 265 dprintk("%s:Return\n", __func__);
254} 266}
@@ -307,40 +319,135 @@ pnfs_destroy_all_layouts(struct nfs_client *clp)
307 } 319 }
308} 320}
309 321
310static void pnfs_insert_layout(struct pnfs_layout_hdr *lo, 322/* update lo->stateid with new if is more recent
311 struct pnfs_layout_segment *lseg); 323 *
324 * lo->stateid could be the open stateid, in which case we just use what given.
325 */
326static void
327pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
328 const nfs4_stateid *new)
329{
330 nfs4_stateid *old = &lo->stateid;
331 bool overwrite = false;
332
333 write_seqlock(&lo->seqlock);
334 if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state) ||
335 memcmp(old->stateid.other, new->stateid.other, sizeof(new->stateid.other)))
336 overwrite = true;
337 else {
338 u32 oldseq, newseq;
339
340 oldseq = be32_to_cpu(old->stateid.seqid);
341 newseq = be32_to_cpu(new->stateid.seqid);
342 if ((int)(newseq - oldseq) > 0)
343 overwrite = true;
344 }
345 if (overwrite)
346 memcpy(&old->stateid, &new->stateid, sizeof(new->stateid));
347 write_sequnlock(&lo->seqlock);
348}
349
350static void
351pnfs_layout_from_open_stateid(struct pnfs_layout_hdr *lo,
352 struct nfs4_state *state)
353{
354 int seq;
355
356 dprintk("--> %s\n", __func__);
357 write_seqlock(&lo->seqlock);
358 do {
359 seq = read_seqbegin(&state->seqlock);
360 memcpy(lo->stateid.data, state->stateid.data,
361 sizeof(state->stateid.data));
362 } while (read_seqretry(&state->seqlock, seq));
363 set_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
364 write_sequnlock(&lo->seqlock);
365 dprintk("<-- %s\n", __func__);
366}
367
368void
369pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
370 struct nfs4_state *open_state)
371{
372 int seq;
312 373
313/* Get layout from server. */ 374 dprintk("--> %s\n", __func__);
375 do {
376 seq = read_seqbegin(&lo->seqlock);
377 if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state)) {
378 /* This will trigger retry of the read */
379 pnfs_layout_from_open_stateid(lo, open_state);
380 } else
381 memcpy(dst->data, lo->stateid.data,
382 sizeof(lo->stateid.data));
383 } while (read_seqretry(&lo->seqlock, seq));
384 dprintk("<-- %s\n", __func__);
385}
386
387/*
388* Get layout from server.
389* for now, assume that whole file layouts are requested.
390* arg->offset: 0
391* arg->length: all ones
392*/
314static struct pnfs_layout_segment * 393static struct pnfs_layout_segment *
315send_layoutget(struct pnfs_layout_hdr *lo, 394send_layoutget(struct pnfs_layout_hdr *lo,
316 struct nfs_open_context *ctx, 395 struct nfs_open_context *ctx,
317 u32 iomode) 396 u32 iomode)
318{ 397{
319 struct inode *ino = lo->inode; 398 struct inode *ino = lo->inode;
320 struct pnfs_layout_segment *lseg; 399 struct nfs_server *server = NFS_SERVER(ino);
400 struct nfs4_layoutget *lgp;
401 struct pnfs_layout_segment *lseg = NULL;
402
403 dprintk("--> %s\n", __func__);
321 404
322 /* Lets pretend we sent LAYOUTGET and got a response */ 405 BUG_ON(ctx == NULL);
323 lseg = kzalloc(sizeof(*lseg), GFP_KERNEL); 406 lgp = kzalloc(sizeof(*lgp), GFP_KERNEL);
407 if (lgp == NULL) {
408 put_layout_hdr(lo->inode);
409 return NULL;
410 }
411 lgp->args.minlength = NFS4_MAX_UINT64;
412 lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
413 lgp->args.range.iomode = iomode;
414 lgp->args.range.offset = 0;
415 lgp->args.range.length = NFS4_MAX_UINT64;
416 lgp->args.type = server->pnfs_curr_ld->id;
417 lgp->args.inode = ino;
418 lgp->args.ctx = get_nfs_open_context(ctx);
419 lgp->lsegpp = &lseg;
420
421 /* Synchronously retrieve layout information from server and
422 * store in lseg.
423 */
424 nfs4_proc_layoutget(lgp);
324 if (!lseg) { 425 if (!lseg) {
426 /* remember that LAYOUTGET failed and suspend trying */
325 set_bit(lo_fail_bit(iomode), &lo->state); 427 set_bit(lo_fail_bit(iomode), &lo->state);
326 spin_lock(&ino->i_lock);
327 put_layout_hdr_locked(lo);
328 spin_unlock(&ino->i_lock);
329 return NULL;
330 } 428 }
331 init_lseg(lo, lseg);
332 lseg->iomode = IOMODE_RW;
333 spin_lock(&ino->i_lock);
334 pnfs_insert_layout(lo, lseg);
335 put_layout_hdr_locked(lo);
336 spin_unlock(&ino->i_lock);
337 return lseg; 429 return lseg;
338} 430}
339 431
432/*
433 * Compare two layout segments for sorting into layout cache.
434 * We want to preferentially return RW over RO layouts, so ensure those
435 * are seen first.
436 */
437static s64
438cmp_layout(u32 iomode1, u32 iomode2)
439{
440 /* read > read/write */
441 return (int)(iomode2 == IOMODE_READ) - (int)(iomode1 == IOMODE_READ);
442}
443
340static void 444static void
341pnfs_insert_layout(struct pnfs_layout_hdr *lo, 445pnfs_insert_layout(struct pnfs_layout_hdr *lo,
342 struct pnfs_layout_segment *lseg) 446 struct pnfs_layout_segment *lseg)
343{ 447{
448 struct pnfs_layout_segment *lp;
449 int found = 0;
450
344 dprintk("%s:Begin\n", __func__); 451 dprintk("%s:Begin\n", __func__);
345 452
346 assert_spin_locked(&lo->inode->i_lock); 453 assert_spin_locked(&lo->inode->i_lock);
@@ -352,19 +459,28 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo,
352 list_add_tail(&lo->layouts, &clp->cl_layouts); 459 list_add_tail(&lo->layouts, &clp->cl_layouts);
353 spin_unlock(&clp->cl_lock); 460 spin_unlock(&clp->cl_lock);
354 } 461 }
355 get_layout_hdr_locked(lo); 462 list_for_each_entry(lp, &lo->segs, fi_list) {
356 /* STUB - add the constructed lseg if necessary */ 463 if (cmp_layout(lp->range.iomode, lseg->range.iomode) > 0)
357 if (list_empty(&lo->segs)) { 464 continue;
465 list_add_tail(&lseg->fi_list, &lp->fi_list);
466 dprintk("%s: inserted lseg %p "
467 "iomode %d offset %llu length %llu before "
468 "lp %p iomode %d offset %llu length %llu\n",
469 __func__, lseg, lseg->range.iomode,
470 lseg->range.offset, lseg->range.length,
471 lp, lp->range.iomode, lp->range.offset,
472 lp->range.length);
473 found = 1;
474 break;
475 }
476 if (!found) {
358 list_add_tail(&lseg->fi_list, &lo->segs); 477 list_add_tail(&lseg->fi_list, &lo->segs);
359 dprintk("%s: inserted lseg %p iomode %d at tail\n", 478 dprintk("%s: inserted lseg %p "
360 __func__, lseg, lseg->iomode); 479 "iomode %d offset %llu length %llu at tail\n",
361 } else { 480 __func__, lseg, lseg->range.iomode,
362 /* There is no harm for the moment in calling this 481 lseg->range.offset, lseg->range.length);
363 * with the lock held, and the call will be removed
364 * with the STUB.
365 */
366 put_lseg(lseg);
367 } 482 }
483 get_layout_hdr_locked(lo);
368 484
369 dprintk("%s:Return\n", __func__); 485 dprintk("%s:Return\n", __func__);
370} 486}
@@ -380,6 +496,7 @@ alloc_init_layout_hdr(struct inode *ino)
380 lo->refcount = 1; 496 lo->refcount = 1;
381 INIT_LIST_HEAD(&lo->layouts); 497 INIT_LIST_HEAD(&lo->layouts);
382 INIT_LIST_HEAD(&lo->segs); 498 INIT_LIST_HEAD(&lo->segs);
499 seqlock_init(&lo->seqlock);
383 lo->inode = ino; 500 lo->inode = ino;
384 return lo; 501 return lo;
385} 502}
@@ -407,11 +524,46 @@ pnfs_find_alloc_layout(struct inode *ino)
407 return nfsi->layout; 524 return nfsi->layout;
408} 525}
409 526
410/* STUB - LAYOUTGET never succeeds, so cache is empty */ 527/*
528 * iomode matching rules:
529 * iomode lseg match
530 * ----- ----- -----
531 * ANY READ true
532 * ANY RW true
533 * RW READ false
534 * RW RW true
535 * READ READ true
536 * READ RW true
537 */
538static int
539is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode)
540{
541 return (iomode != IOMODE_RW || lseg->range.iomode == IOMODE_RW);
542}
543
544/*
545 * lookup range in layout
546 */
411static struct pnfs_layout_segment * 547static struct pnfs_layout_segment *
412pnfs_has_layout(struct pnfs_layout_hdr *lo, u32 iomode) 548pnfs_has_layout(struct pnfs_layout_hdr *lo, u32 iomode)
413{ 549{
414 return NULL; 550 struct pnfs_layout_segment *lseg, *ret = NULL;
551
552 dprintk("%s:Begin\n", __func__);
553
554 assert_spin_locked(&lo->inode->i_lock);
555 list_for_each_entry(lseg, &lo->segs, fi_list) {
556 if (is_matching_lseg(lseg, iomode)) {
557 ret = lseg;
558 break;
559 }
560 if (cmp_layout(iomode, lseg->range.iomode) > 0)
561 break;
562 }
563
564 dprintk("%s:Return lseg %p ref %d\n",
565 __func__, ret, ret ? atomic_read(&ret->kref.refcount) : 0);
566 return ret;
415} 567}
416 568
417/* 569/*
@@ -448,7 +600,7 @@ pnfs_update_layout(struct inode *ino,
448 if (test_bit(lo_fail_bit(iomode), &nfsi->layout->state)) 600 if (test_bit(lo_fail_bit(iomode), &nfsi->layout->state))
449 goto out_unlock; 601 goto out_unlock;
450 602
451 get_layout_hdr_locked(lo); 603 get_layout_hdr_locked(lo); /* Matched in nfs4_layoutget_release */
452 spin_unlock(&ino->i_lock); 604 spin_unlock(&ino->i_lock);
453 605
454 lseg = send_layoutget(lo, ctx, iomode); 606 lseg = send_layoutget(lo, ctx, iomode);
@@ -460,3 +612,172 @@ out_unlock:
460 spin_unlock(&ino->i_lock); 612 spin_unlock(&ino->i_lock);
461 goto out; 613 goto out;
462} 614}
615
616int
617pnfs_layout_process(struct nfs4_layoutget *lgp)
618{
619 struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout;
620 struct nfs4_layoutget_res *res = &lgp->res;
621 struct pnfs_layout_segment *lseg;
622 struct inode *ino = lo->inode;
623 int status = 0;
624
625 /* Inject layout blob into I/O device driver */
626 lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res);
627 if (!lseg || IS_ERR(lseg)) {
628 if (!lseg)
629 status = -ENOMEM;
630 else
631 status = PTR_ERR(lseg);
632 dprintk("%s: Could not allocate layout: error %d\n",
633 __func__, status);
634 goto out;
635 }
636
637 spin_lock(&ino->i_lock);
638 init_lseg(lo, lseg);
639 lseg->range = res->range;
640 *lgp->lsegpp = lseg;
641 pnfs_insert_layout(lo, lseg);
642
643 /* Done processing layoutget. Set the layout stateid */
644 pnfs_set_layout_stateid(lo, &res->stateid);
645 spin_unlock(&ino->i_lock);
646out:
647 return status;
648}
649
650/*
651 * Device ID cache. Currently supports one layout type per struct nfs_client.
652 * Add layout type to the lookup key to expand to support multiple types.
653 */
654int
655pnfs_alloc_init_deviceid_cache(struct nfs_client *clp,
656 void (*free_callback)(struct pnfs_deviceid_node *))
657{
658 struct pnfs_deviceid_cache *c;
659
660 c = kzalloc(sizeof(struct pnfs_deviceid_cache), GFP_KERNEL);
661 if (!c)
662 return -ENOMEM;
663 spin_lock(&clp->cl_lock);
664 if (clp->cl_devid_cache != NULL) {
665 atomic_inc(&clp->cl_devid_cache->dc_ref);
666 dprintk("%s [kref [%d]]\n", __func__,
667 atomic_read(&clp->cl_devid_cache->dc_ref));
668 kfree(c);
669 } else {
670 /* kzalloc initializes hlists */
671 spin_lock_init(&c->dc_lock);
672 atomic_set(&c->dc_ref, 1);
673 c->dc_free_callback = free_callback;
674 clp->cl_devid_cache = c;
675 dprintk("%s [new]\n", __func__);
676 }
677 spin_unlock(&clp->cl_lock);
678 return 0;
679}
680EXPORT_SYMBOL_GPL(pnfs_alloc_init_deviceid_cache);
681
682/*
683 * Called from pnfs_layoutdriver_type->free_lseg
684 * last layout segment reference frees deviceid
685 */
686void
687pnfs_put_deviceid(struct pnfs_deviceid_cache *c,
688 struct pnfs_deviceid_node *devid)
689{
690 struct nfs4_deviceid *id = &devid->de_id;
691 struct pnfs_deviceid_node *d;
692 struct hlist_node *n;
693 long h = nfs4_deviceid_hash(id);
694
695 dprintk("%s [%d]\n", __func__, atomic_read(&devid->de_ref));
696 if (!atomic_dec_and_lock(&devid->de_ref, &c->dc_lock))
697 return;
698
699 hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[h], de_node)
700 if (!memcmp(&d->de_id, id, sizeof(*id))) {
701 hlist_del_rcu(&d->de_node);
702 spin_unlock(&c->dc_lock);
703 synchronize_rcu();
704 c->dc_free_callback(devid);
705 return;
706 }
707 spin_unlock(&c->dc_lock);
708 /* Why wasn't it found in the list? */
709 BUG();
710}
711EXPORT_SYMBOL_GPL(pnfs_put_deviceid);
712
713/* Find and reference a deviceid */
714struct pnfs_deviceid_node *
715pnfs_find_get_deviceid(struct pnfs_deviceid_cache *c, struct nfs4_deviceid *id)
716{
717 struct pnfs_deviceid_node *d;
718 struct hlist_node *n;
719 long hash = nfs4_deviceid_hash(id);
720
721 dprintk("--> %s hash %ld\n", __func__, hash);
722 rcu_read_lock();
723 hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
724 if (!memcmp(&d->de_id, id, sizeof(*id))) {
725 if (!atomic_inc_not_zero(&d->de_ref)) {
726 goto fail;
727 } else {
728 rcu_read_unlock();
729 return d;
730 }
731 }
732 }
733fail:
734 rcu_read_unlock();
735 return NULL;
736}
737EXPORT_SYMBOL_GPL(pnfs_find_get_deviceid);
738
739/*
740 * Add a deviceid to the cache.
741 * GETDEVICEINFOs for same deviceid can race. If deviceid is found, discard new
742 */
743struct pnfs_deviceid_node *
744pnfs_add_deviceid(struct pnfs_deviceid_cache *c, struct pnfs_deviceid_node *new)
745{
746 struct pnfs_deviceid_node *d;
747 long hash = nfs4_deviceid_hash(&new->de_id);
748
749 dprintk("--> %s hash %ld\n", __func__, hash);
750 spin_lock(&c->dc_lock);
751 d = pnfs_find_get_deviceid(c, &new->de_id);
752 if (d) {
753 spin_unlock(&c->dc_lock);
754 dprintk("%s [discard]\n", __func__);
755 c->dc_free_callback(new);
756 return d;
757 }
758 INIT_HLIST_NODE(&new->de_node);
759 atomic_set(&new->de_ref, 1);
760 hlist_add_head_rcu(&new->de_node, &c->dc_deviceids[hash]);
761 spin_unlock(&c->dc_lock);
762 dprintk("%s [new]\n", __func__);
763 return new;
764}
765EXPORT_SYMBOL_GPL(pnfs_add_deviceid);
766
767void
768pnfs_put_deviceid_cache(struct nfs_client *clp)
769{
770 struct pnfs_deviceid_cache *local = clp->cl_devid_cache;
771
772 dprintk("--> %s cl_devid_cache %p\n", __func__, clp->cl_devid_cache);
773 if (atomic_dec_and_lock(&local->dc_ref, &clp->cl_lock)) {
774 int i;
775 /* Verify cache is empty */
776 for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i++)
777 BUG_ON(!hlist_empty(&local->dc_deviceids[i]));
778 clp->cl_devid_cache = NULL;
779 spin_unlock(&clp->cl_lock);
780 kfree(local);
781 }
782}
783EXPORT_SYMBOL_GPL(pnfs_put_deviceid_cache);