diff options
| author | Trond Myklebust <Trond.Myklebust@netapp.com> | 2013-02-12 09:48:42 -0500 |
|---|---|---|
| committer | Trond Myklebust <Trond.Myklebust@netapp.com> | 2013-02-14 13:22:50 -0500 |
| commit | fd9a8d7160937f94aad36ac80d7255b4988740ac (patch) | |
| tree | e437737b8f918134b2ab26bfb74883b0fc47092d | |
| parent | c8da19b9866ea84e9ad1c369393ea95d54ee7845 (diff) | |
NFSv4.1: Fix bulk recall and destroy of layouts
The current code in pnfs_destroy_all_layouts() assumes that removing
the layout from the server->layouts list is sufficient to make it
invisible to other processes. This ignores the fact that most
users access the layout through the nfs_inode->layout...
There is further breakage due to lack of reference counting of the
layouts, meaning that the whole thing Oopses at the drop of a hat.
The code in initiate_bulk_draining() is almost correct, and can be
used as a model for pnfs_destroy_all_layouts(), so move that
code to pnfs.c, and refactor the code to allow us to choose between
a single filesystem bulk recall, and a recall of all layouts.
Also note that initiate_bulk_draining() currently calls iput() while
holding locks. Fix that too.
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: stable@vger.kernel.org
| -rw-r--r-- | fs/nfs/callback_proc.c | 61 | ||||
| -rw-r--r-- | fs/nfs/pnfs.c | 150 | ||||
| -rw-r--r-- | fs/nfs/pnfs.h | 7 |
3 files changed, 144 insertions, 74 deletions
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 264d1aa935f2..2960512792c2 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c | |||
| @@ -183,60 +183,15 @@ static u32 initiate_file_draining(struct nfs_client *clp, | |||
| 183 | static u32 initiate_bulk_draining(struct nfs_client *clp, | 183 | static u32 initiate_bulk_draining(struct nfs_client *clp, |
| 184 | struct cb_layoutrecallargs *args) | 184 | struct cb_layoutrecallargs *args) |
| 185 | { | 185 | { |
| 186 | struct nfs_server *server; | 186 | int stat; |
| 187 | struct pnfs_layout_hdr *lo; | ||
| 188 | struct inode *ino; | ||
| 189 | u32 rv = NFS4ERR_NOMATCHING_LAYOUT; | ||
| 190 | struct pnfs_layout_hdr *tmp; | ||
| 191 | LIST_HEAD(recall_list); | ||
| 192 | LIST_HEAD(free_me_list); | ||
| 193 | struct pnfs_layout_range range = { | ||
| 194 | .iomode = IOMODE_ANY, | ||
| 195 | .offset = 0, | ||
| 196 | .length = NFS4_MAX_UINT64, | ||
| 197 | }; | ||
| 198 | |||
| 199 | spin_lock(&clp->cl_lock); | ||
| 200 | rcu_read_lock(); | ||
| 201 | list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { | ||
| 202 | if ((args->cbl_recall_type == RETURN_FSID) && | ||
| 203 | memcmp(&server->fsid, &args->cbl_fsid, | ||
| 204 | sizeof(struct nfs_fsid))) | ||
| 205 | continue; | ||
| 206 | 187 | ||
| 207 | list_for_each_entry(lo, &server->layouts, plh_layouts) { | 188 | if (args->cbl_recall_type == RETURN_FSID) |
| 208 | ino = igrab(lo->plh_inode); | 189 | stat = pnfs_destroy_layouts_byfsid(clp, &args->cbl_fsid, true); |
| 209 | if (!ino) | 190 | else |
| 210 | continue; | 191 | stat = pnfs_destroy_layouts_byclid(clp, true); |
| 211 | spin_lock(&ino->i_lock); | 192 | if (stat != 0) |
| 212 | /* Is this layout in the process of being freed? */ | 193 | return NFS4ERR_DELAY; |
| 213 | if (NFS_I(ino)->layout != lo) { | 194 | return NFS4ERR_NOMATCHING_LAYOUT; |
| 214 | spin_unlock(&ino->i_lock); | ||
| 215 | iput(ino); | ||
| 216 | continue; | ||
| 217 | } | ||
| 218 | pnfs_get_layout_hdr(lo); | ||
| 219 | spin_unlock(&ino->i_lock); | ||
| 220 | list_add(&lo->plh_bulk_recall, &recall_list); | ||
| 221 | } | ||
| 222 | } | ||
| 223 | rcu_read_unlock(); | ||
| 224 | spin_unlock(&clp->cl_lock); | ||
| 225 | |||
| 226 | list_for_each_entry_safe(lo, tmp, | ||
| 227 | &recall_list, plh_bulk_recall) { | ||
| 228 | ino = lo->plh_inode; | ||
| 229 | spin_lock(&ino->i_lock); | ||
| 230 | set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); | ||
| 231 | if (pnfs_mark_matching_lsegs_invalid(lo, &free_me_list, &range)) | ||
| 232 | rv = NFS4ERR_DELAY; | ||
| 233 | list_del_init(&lo->plh_bulk_recall); | ||
| 234 | spin_unlock(&ino->i_lock); | ||
| 235 | pnfs_free_lseg_list(&free_me_list); | ||
| 236 | pnfs_put_layout_hdr(lo); | ||
| 237 | iput(ino); | ||
| 238 | } | ||
| 239 | return rv; | ||
| 240 | } | 195 | } |
| 241 | 196 | ||
| 242 | static u32 do_callback_layoutrecall(struct nfs_client *clp, | 197 | static u32 do_callback_layoutrecall(struct nfs_client *clp, |
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index d00260b08103..6be70f622b62 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
| @@ -505,37 +505,147 @@ pnfs_destroy_layout(struct nfs_inode *nfsi) | |||
| 505 | } | 505 | } |
| 506 | EXPORT_SYMBOL_GPL(pnfs_destroy_layout); | 506 | EXPORT_SYMBOL_GPL(pnfs_destroy_layout); |
| 507 | 507 | ||
| 508 | /* | 508 | static bool |
| 509 | * Called by the state manger to remove all layouts established under an | 509 | pnfs_layout_add_bulk_destroy_list(struct inode *inode, |
| 510 | * expired lease. | 510 | struct list_head *layout_list) |
| 511 | */ | ||
| 512 | void | ||
| 513 | pnfs_destroy_all_layouts(struct nfs_client *clp) | ||
| 514 | { | 511 | { |
| 515 | struct nfs_server *server; | ||
| 516 | struct pnfs_layout_hdr *lo; | 512 | struct pnfs_layout_hdr *lo; |
| 517 | LIST_HEAD(tmp_list); | 513 | bool ret = false; |
| 518 | 514 | ||
| 519 | nfs4_deviceid_mark_client_invalid(clp); | 515 | spin_lock(&inode->i_lock); |
| 520 | nfs4_deviceid_purge_client(clp); | 516 | lo = NFS_I(inode)->layout; |
| 517 | if (lo != NULL && list_empty(&lo->plh_bulk_destroy)) { | ||
| 518 | pnfs_get_layout_hdr(lo); | ||
| 519 | list_add(&lo->plh_bulk_destroy, layout_list); | ||
| 520 | ret = true; | ||
| 521 | } | ||
| 522 | spin_unlock(&inode->i_lock); | ||
| 523 | return ret; | ||
| 524 | } | ||
| 525 | |||
| 526 | /* Caller must hold rcu_read_lock and clp->cl_lock */ | ||
| 527 | static int | ||
| 528 | pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp, | ||
| 529 | struct nfs_server *server, | ||
| 530 | struct list_head *layout_list) | ||
| 531 | { | ||
| 532 | struct pnfs_layout_hdr *lo, *next; | ||
| 533 | struct inode *inode; | ||
| 534 | |||
| 535 | list_for_each_entry_safe(lo, next, &server->layouts, plh_layouts) { | ||
| 536 | inode = igrab(lo->plh_inode); | ||
| 537 | if (inode == NULL) | ||
| 538 | continue; | ||
| 539 | list_del_init(&lo->plh_layouts); | ||
| 540 | if (pnfs_layout_add_bulk_destroy_list(inode, layout_list)) | ||
| 541 | continue; | ||
| 542 | rcu_read_unlock(); | ||
| 543 | spin_unlock(&clp->cl_lock); | ||
| 544 | iput(inode); | ||
| 545 | spin_lock(&clp->cl_lock); | ||
| 546 | rcu_read_lock(); | ||
| 547 | return -EAGAIN; | ||
| 548 | } | ||
| 549 | return 0; | ||
| 550 | } | ||
| 551 | |||
| 552 | static int | ||
| 553 | pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list, | ||
| 554 | bool is_bulk_recall) | ||
| 555 | { | ||
| 556 | struct pnfs_layout_hdr *lo; | ||
| 557 | struct inode *inode; | ||
| 558 | struct pnfs_layout_range range = { | ||
| 559 | .iomode = IOMODE_ANY, | ||
| 560 | .offset = 0, | ||
| 561 | .length = NFS4_MAX_UINT64, | ||
| 562 | }; | ||
| 563 | LIST_HEAD(lseg_list); | ||
| 564 | int ret = 0; | ||
| 565 | |||
| 566 | while (!list_empty(layout_list)) { | ||
| 567 | lo = list_entry(layout_list->next, struct pnfs_layout_hdr, | ||
| 568 | plh_bulk_destroy); | ||
| 569 | dprintk("%s freeing layout for inode %lu\n", __func__, | ||
| 570 | lo->plh_inode->i_ino); | ||
| 571 | inode = lo->plh_inode; | ||
| 572 | spin_lock(&inode->i_lock); | ||
| 573 | list_del_init(&lo->plh_bulk_destroy); | ||
| 574 | lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */ | ||
| 575 | if (is_bulk_recall) | ||
| 576 | set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); | ||
| 577 | if (pnfs_mark_matching_lsegs_invalid(lo, &lseg_list, &range)) | ||
| 578 | ret = -EAGAIN; | ||
| 579 | spin_unlock(&inode->i_lock); | ||
| 580 | pnfs_free_lseg_list(&lseg_list); | ||
| 581 | pnfs_put_layout_hdr(lo); | ||
| 582 | iput(inode); | ||
| 583 | } | ||
| 584 | return ret; | ||
| 585 | } | ||
| 586 | |||
| 587 | int | ||
| 588 | pnfs_destroy_layouts_byfsid(struct nfs_client *clp, | ||
| 589 | struct nfs_fsid *fsid, | ||
| 590 | bool is_recall) | ||
| 591 | { | ||
| 592 | struct nfs_server *server; | ||
| 593 | LIST_HEAD(layout_list); | ||
| 521 | 594 | ||
| 522 | spin_lock(&clp->cl_lock); | 595 | spin_lock(&clp->cl_lock); |
| 523 | rcu_read_lock(); | 596 | rcu_read_lock(); |
| 597 | restart: | ||
| 524 | list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { | 598 | list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { |
| 525 | if (!list_empty(&server->layouts)) | 599 | if (memcmp(&server->fsid, fsid, sizeof(*fsid)) != 0) |
| 526 | list_splice_init(&server->layouts, &tmp_list); | 600 | continue; |
| 601 | if (pnfs_layout_bulk_destroy_byserver_locked(clp, | ||
| 602 | server, | ||
| 603 | &layout_list) != 0) | ||
| 604 | goto restart; | ||
| 527 | } | 605 | } |
| 528 | rcu_read_unlock(); | 606 | rcu_read_unlock(); |
| 529 | spin_unlock(&clp->cl_lock); | 607 | spin_unlock(&clp->cl_lock); |
| 530 | 608 | ||
| 531 | while (!list_empty(&tmp_list)) { | 609 | if (list_empty(&layout_list)) |
| 532 | lo = list_entry(tmp_list.next, struct pnfs_layout_hdr, | 610 | return 0; |
| 533 | plh_layouts); | 611 | return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall); |
| 534 | dprintk("%s freeing layout for inode %lu\n", __func__, | 612 | } |
| 535 | lo->plh_inode->i_ino); | 613 | |
| 536 | list_del_init(&lo->plh_layouts); | 614 | int |
| 537 | pnfs_destroy_layout(NFS_I(lo->plh_inode)); | 615 | pnfs_destroy_layouts_byclid(struct nfs_client *clp, |
| 616 | bool is_recall) | ||
| 617 | { | ||
| 618 | struct nfs_server *server; | ||
| 619 | LIST_HEAD(layout_list); | ||
| 620 | |||
| 621 | spin_lock(&clp->cl_lock); | ||
| 622 | rcu_read_lock(); | ||
| 623 | restart: | ||
| 624 | list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { | ||
| 625 | if (pnfs_layout_bulk_destroy_byserver_locked(clp, | ||
| 626 | server, | ||
| 627 | &layout_list) != 0) | ||
| 628 | goto restart; | ||
| 538 | } | 629 | } |
| 630 | rcu_read_unlock(); | ||
| 631 | spin_unlock(&clp->cl_lock); | ||
| 632 | |||
| 633 | if (list_empty(&layout_list)) | ||
| 634 | return 0; | ||
| 635 | return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall); | ||
| 636 | } | ||
| 637 | |||
| 638 | /* | ||
| 639 | * Called by the state manger to remove all layouts established under an | ||
| 640 | * expired lease. | ||
| 641 | */ | ||
| 642 | void | ||
| 643 | pnfs_destroy_all_layouts(struct nfs_client *clp) | ||
| 644 | { | ||
| 645 | nfs4_deviceid_mark_client_invalid(clp); | ||
| 646 | nfs4_deviceid_purge_client(clp); | ||
| 647 | |||
| 648 | pnfs_destroy_layouts_byclid(clp, false); | ||
| 539 | } | 649 | } |
| 540 | 650 | ||
| 541 | /* | 651 | /* |
| @@ -888,7 +998,7 @@ alloc_init_layout_hdr(struct inode *ino, | |||
| 888 | atomic_set(&lo->plh_refcount, 1); | 998 | atomic_set(&lo->plh_refcount, 1); |
| 889 | INIT_LIST_HEAD(&lo->plh_layouts); | 999 | INIT_LIST_HEAD(&lo->plh_layouts); |
| 890 | INIT_LIST_HEAD(&lo->plh_segs); | 1000 | INIT_LIST_HEAD(&lo->plh_segs); |
| 891 | INIT_LIST_HEAD(&lo->plh_bulk_recall); | 1001 | INIT_LIST_HEAD(&lo->plh_bulk_destroy); |
| 892 | lo->plh_inode = ino; | 1002 | lo->plh_inode = ino; |
| 893 | lo->plh_lc_cred = get_rpccred(ctx->state->owner->so_cred); | 1003 | lo->plh_lc_cred = get_rpccred(ctx->state->owner->so_cred); |
| 894 | return lo; | 1004 | return lo; |
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index dbf7bba52da0..97cb358bb882 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h | |||
| @@ -132,7 +132,7 @@ struct pnfs_layoutdriver_type { | |||
| 132 | struct pnfs_layout_hdr { | 132 | struct pnfs_layout_hdr { |
| 133 | atomic_t plh_refcount; | 133 | atomic_t plh_refcount; |
| 134 | struct list_head plh_layouts; /* other client layouts */ | 134 | struct list_head plh_layouts; /* other client layouts */ |
| 135 | struct list_head plh_bulk_recall; /* clnt list of bulk recalls */ | 135 | struct list_head plh_bulk_destroy; |
| 136 | struct list_head plh_segs; /* layout segments list */ | 136 | struct list_head plh_segs; /* layout segments list */ |
| 137 | nfs4_stateid plh_stateid; | 137 | nfs4_stateid plh_stateid; |
| 138 | atomic_t plh_outstanding; /* number of RPCs out */ | 138 | atomic_t plh_outstanding; /* number of RPCs out */ |
| @@ -196,6 +196,11 @@ struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp); | |||
| 196 | void pnfs_free_lseg_list(struct list_head *tmp_list); | 196 | void pnfs_free_lseg_list(struct list_head *tmp_list); |
| 197 | void pnfs_destroy_layout(struct nfs_inode *); | 197 | void pnfs_destroy_layout(struct nfs_inode *); |
| 198 | void pnfs_destroy_all_layouts(struct nfs_client *); | 198 | void pnfs_destroy_all_layouts(struct nfs_client *); |
| 199 | int pnfs_destroy_layouts_byfsid(struct nfs_client *clp, | ||
| 200 | struct nfs_fsid *fsid, | ||
| 201 | bool is_recall); | ||
| 202 | int pnfs_destroy_layouts_byclid(struct nfs_client *clp, | ||
| 203 | bool is_recall); | ||
| 199 | void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo); | 204 | void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo); |
| 200 | void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, | 205 | void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, |
| 201 | const nfs4_stateid *new, | 206 | const nfs4_stateid *new, |
