aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorTrond Myklebust <Trond.Myklebust@netapp.com>2013-02-12 09:48:42 -0500
committerTrond Myklebust <Trond.Myklebust@netapp.com>2013-02-14 13:22:50 -0500
commitfd9a8d7160937f94aad36ac80d7255b4988740ac (patch)
treee437737b8f918134b2ab26bfb74883b0fc47092d /fs
parentc8da19b9866ea84e9ad1c369393ea95d54ee7845 (diff)
NFSv4.1: Fix bulk recall and destroy of layouts
The current code in pnfs_destroy_all_layouts() assumes that removing the layout from the server->layouts list is sufficient to make it invisible to other processes. This ignores the fact that most users access the layout through the nfs_inode->layout... There is further breakage due to lack of reference counting of the layouts, meaning that the whole thing Oopses at the drop of a hat. The code in initiate_bulk_draining() is almost correct, and can be used as a model for pnfs_destroy_all_layouts(), so move that code to pnfs.c, and refactor the code to allow us to choose between a single filesystem bulk recall, and a recall of all layouts. Also note that initiate_bulk_draining() currently calls iput() while holding locks. Fix that too. Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> Cc: stable@vger.kernel.org
Diffstat (limited to 'fs')
-rw-r--r--fs/nfs/callback_proc.c61
-rw-r--r--fs/nfs/pnfs.c150
-rw-r--r--fs/nfs/pnfs.h7
3 files changed, 144 insertions, 74 deletions
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 264d1aa935f2..2960512792c2 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -183,60 +183,15 @@ static u32 initiate_file_draining(struct nfs_client *clp,
183static u32 initiate_bulk_draining(struct nfs_client *clp, 183static u32 initiate_bulk_draining(struct nfs_client *clp,
184 struct cb_layoutrecallargs *args) 184 struct cb_layoutrecallargs *args)
185{ 185{
186 struct nfs_server *server; 186 int stat;
187 struct pnfs_layout_hdr *lo;
188 struct inode *ino;
189 u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
190 struct pnfs_layout_hdr *tmp;
191 LIST_HEAD(recall_list);
192 LIST_HEAD(free_me_list);
193 struct pnfs_layout_range range = {
194 .iomode = IOMODE_ANY,
195 .offset = 0,
196 .length = NFS4_MAX_UINT64,
197 };
198
199 spin_lock(&clp->cl_lock);
200 rcu_read_lock();
201 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
202 if ((args->cbl_recall_type == RETURN_FSID) &&
203 memcmp(&server->fsid, &args->cbl_fsid,
204 sizeof(struct nfs_fsid)))
205 continue;
206 187
207 list_for_each_entry(lo, &server->layouts, plh_layouts) { 188 if (args->cbl_recall_type == RETURN_FSID)
208 ino = igrab(lo->plh_inode); 189 stat = pnfs_destroy_layouts_byfsid(clp, &args->cbl_fsid, true);
209 if (!ino) 190 else
210 continue; 191 stat = pnfs_destroy_layouts_byclid(clp, true);
211 spin_lock(&ino->i_lock); 192 if (stat != 0)
212 /* Is this layout in the process of being freed? */ 193 return NFS4ERR_DELAY;
213 if (NFS_I(ino)->layout != lo) { 194 return NFS4ERR_NOMATCHING_LAYOUT;
214 spin_unlock(&ino->i_lock);
215 iput(ino);
216 continue;
217 }
218 pnfs_get_layout_hdr(lo);
219 spin_unlock(&ino->i_lock);
220 list_add(&lo->plh_bulk_recall, &recall_list);
221 }
222 }
223 rcu_read_unlock();
224 spin_unlock(&clp->cl_lock);
225
226 list_for_each_entry_safe(lo, tmp,
227 &recall_list, plh_bulk_recall) {
228 ino = lo->plh_inode;
229 spin_lock(&ino->i_lock);
230 set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
231 if (pnfs_mark_matching_lsegs_invalid(lo, &free_me_list, &range))
232 rv = NFS4ERR_DELAY;
233 list_del_init(&lo->plh_bulk_recall);
234 spin_unlock(&ino->i_lock);
235 pnfs_free_lseg_list(&free_me_list);
236 pnfs_put_layout_hdr(lo);
237 iput(ino);
238 }
239 return rv;
240} 195}
241 196
242static u32 do_callback_layoutrecall(struct nfs_client *clp, 197static u32 do_callback_layoutrecall(struct nfs_client *clp,
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index d00260b08103..6be70f622b62 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -505,37 +505,147 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
505} 505}
506EXPORT_SYMBOL_GPL(pnfs_destroy_layout); 506EXPORT_SYMBOL_GPL(pnfs_destroy_layout);
507 507
508/* 508static bool
509 * Called by the state manger to remove all layouts established under an 509pnfs_layout_add_bulk_destroy_list(struct inode *inode,
510 * expired lease. 510 struct list_head *layout_list)
511 */
512void
513pnfs_destroy_all_layouts(struct nfs_client *clp)
514{ 511{
515 struct nfs_server *server;
516 struct pnfs_layout_hdr *lo; 512 struct pnfs_layout_hdr *lo;
517 LIST_HEAD(tmp_list); 513 bool ret = false;
518 514
519 nfs4_deviceid_mark_client_invalid(clp); 515 spin_lock(&inode->i_lock);
520 nfs4_deviceid_purge_client(clp); 516 lo = NFS_I(inode)->layout;
517 if (lo != NULL && list_empty(&lo->plh_bulk_destroy)) {
518 pnfs_get_layout_hdr(lo);
519 list_add(&lo->plh_bulk_destroy, layout_list);
520 ret = true;
521 }
522 spin_unlock(&inode->i_lock);
523 return ret;
524}
525
526/* Caller must hold rcu_read_lock and clp->cl_lock */
527static int
528pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp,
529 struct nfs_server *server,
530 struct list_head *layout_list)
531{
532 struct pnfs_layout_hdr *lo, *next;
533 struct inode *inode;
534
535 list_for_each_entry_safe(lo, next, &server->layouts, plh_layouts) {
536 inode = igrab(lo->plh_inode);
537 if (inode == NULL)
538 continue;
539 list_del_init(&lo->plh_layouts);
540 if (pnfs_layout_add_bulk_destroy_list(inode, layout_list))
541 continue;
542 rcu_read_unlock();
543 spin_unlock(&clp->cl_lock);
544 iput(inode);
545 spin_lock(&clp->cl_lock);
546 rcu_read_lock();
547 return -EAGAIN;
548 }
549 return 0;
550}
551
552static int
553pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
554 bool is_bulk_recall)
555{
556 struct pnfs_layout_hdr *lo;
557 struct inode *inode;
558 struct pnfs_layout_range range = {
559 .iomode = IOMODE_ANY,
560 .offset = 0,
561 .length = NFS4_MAX_UINT64,
562 };
563 LIST_HEAD(lseg_list);
564 int ret = 0;
565
566 while (!list_empty(layout_list)) {
567 lo = list_entry(layout_list->next, struct pnfs_layout_hdr,
568 plh_bulk_destroy);
569 dprintk("%s freeing layout for inode %lu\n", __func__,
570 lo->plh_inode->i_ino);
571 inode = lo->plh_inode;
572 spin_lock(&inode->i_lock);
573 list_del_init(&lo->plh_bulk_destroy);
574 lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */
575 if (is_bulk_recall)
576 set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
577 if (pnfs_mark_matching_lsegs_invalid(lo, &lseg_list, &range))
578 ret = -EAGAIN;
579 spin_unlock(&inode->i_lock);
580 pnfs_free_lseg_list(&lseg_list);
581 pnfs_put_layout_hdr(lo);
582 iput(inode);
583 }
584 return ret;
585}
586
587int
588pnfs_destroy_layouts_byfsid(struct nfs_client *clp,
589 struct nfs_fsid *fsid,
590 bool is_recall)
591{
592 struct nfs_server *server;
593 LIST_HEAD(layout_list);
521 594
522 spin_lock(&clp->cl_lock); 595 spin_lock(&clp->cl_lock);
523 rcu_read_lock(); 596 rcu_read_lock();
597restart:
524 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { 598 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
525 if (!list_empty(&server->layouts)) 599 if (memcmp(&server->fsid, fsid, sizeof(*fsid)) != 0)
526 list_splice_init(&server->layouts, &tmp_list); 600 continue;
601 if (pnfs_layout_bulk_destroy_byserver_locked(clp,
602 server,
603 &layout_list) != 0)
604 goto restart;
527 } 605 }
528 rcu_read_unlock(); 606 rcu_read_unlock();
529 spin_unlock(&clp->cl_lock); 607 spin_unlock(&clp->cl_lock);
530 608
531 while (!list_empty(&tmp_list)) { 609 if (list_empty(&layout_list))
532 lo = list_entry(tmp_list.next, struct pnfs_layout_hdr, 610 return 0;
533 plh_layouts); 611 return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall);
534 dprintk("%s freeing layout for inode %lu\n", __func__, 612}
535 lo->plh_inode->i_ino); 613
536 list_del_init(&lo->plh_layouts); 614int
537 pnfs_destroy_layout(NFS_I(lo->plh_inode)); 615pnfs_destroy_layouts_byclid(struct nfs_client *clp,
616 bool is_recall)
617{
618 struct nfs_server *server;
619 LIST_HEAD(layout_list);
620
621 spin_lock(&clp->cl_lock);
622 rcu_read_lock();
623restart:
624 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
625 if (pnfs_layout_bulk_destroy_byserver_locked(clp,
626 server,
627 &layout_list) != 0)
628 goto restart;
538 } 629 }
630 rcu_read_unlock();
631 spin_unlock(&clp->cl_lock);
632
633 if (list_empty(&layout_list))
634 return 0;
635 return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall);
636}
637
638/*
639 * Called by the state manger to remove all layouts established under an
640 * expired lease.
641 */
642void
643pnfs_destroy_all_layouts(struct nfs_client *clp)
644{
645 nfs4_deviceid_mark_client_invalid(clp);
646 nfs4_deviceid_purge_client(clp);
647
648 pnfs_destroy_layouts_byclid(clp, false);
539} 649}
540 650
541/* 651/*
@@ -888,7 +998,7 @@ alloc_init_layout_hdr(struct inode *ino,
888 atomic_set(&lo->plh_refcount, 1); 998 atomic_set(&lo->plh_refcount, 1);
889 INIT_LIST_HEAD(&lo->plh_layouts); 999 INIT_LIST_HEAD(&lo->plh_layouts);
890 INIT_LIST_HEAD(&lo->plh_segs); 1000 INIT_LIST_HEAD(&lo->plh_segs);
891 INIT_LIST_HEAD(&lo->plh_bulk_recall); 1001 INIT_LIST_HEAD(&lo->plh_bulk_destroy);
892 lo->plh_inode = ino; 1002 lo->plh_inode = ino;
893 lo->plh_lc_cred = get_rpccred(ctx->state->owner->so_cred); 1003 lo->plh_lc_cred = get_rpccred(ctx->state->owner->so_cred);
894 return lo; 1004 return lo;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index dbf7bba52da0..97cb358bb882 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -132,7 +132,7 @@ struct pnfs_layoutdriver_type {
132struct pnfs_layout_hdr { 132struct pnfs_layout_hdr {
133 atomic_t plh_refcount; 133 atomic_t plh_refcount;
134 struct list_head plh_layouts; /* other client layouts */ 134 struct list_head plh_layouts; /* other client layouts */
135 struct list_head plh_bulk_recall; /* clnt list of bulk recalls */ 135 struct list_head plh_bulk_destroy;
136 struct list_head plh_segs; /* layout segments list */ 136 struct list_head plh_segs; /* layout segments list */
137 nfs4_stateid plh_stateid; 137 nfs4_stateid plh_stateid;
138 atomic_t plh_outstanding; /* number of RPCs out */ 138 atomic_t plh_outstanding; /* number of RPCs out */
@@ -196,6 +196,11 @@ struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp);
196void pnfs_free_lseg_list(struct list_head *tmp_list); 196void pnfs_free_lseg_list(struct list_head *tmp_list);
197void pnfs_destroy_layout(struct nfs_inode *); 197void pnfs_destroy_layout(struct nfs_inode *);
198void pnfs_destroy_all_layouts(struct nfs_client *); 198void pnfs_destroy_all_layouts(struct nfs_client *);
199int pnfs_destroy_layouts_byfsid(struct nfs_client *clp,
200 struct nfs_fsid *fsid,
201 bool is_recall);
202int pnfs_destroy_layouts_byclid(struct nfs_client *clp,
203 bool is_recall);
199void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo); 204void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo);
200void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, 205void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
201 const nfs4_stateid *new, 206 const nfs4_stateid *new,