aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFred Isaman <iisaman@netapp.com>2011-02-03 13:28:52 -0500
committerTrond Myklebust <Trond.Myklebust@netapp.com>2011-03-11 15:38:40 -0500
commitf49f9baac8f63de9cbc17a0a84e04060496e8e76 (patch)
treee3e544247ec5e85c47915d34737f278772d15c73
parent9f52c2525e09854ed6aa4cbd83915a56226d86c1 (diff)
pnfs: fix pnfs lock inversion of i_lock and cl_lock
The pnfs code was using throughout the lock order i_lock, cl_lock. This conflicts with the nfs delegation code. Rework the pnfs code to avoid taking both locks simultaneously. Currently the code takes the double lock to add/remove the layout to a nfs_client list, while atomically checking that the list of lsegs is empty. To avoid this, we rely on existing serializations. When a layout is initialized with lseg count equal zero, LAYOUTGET's openstateid serialization is in effect, making it safe to assume it stays zero unless we change it. And once a layout's lseg count drops to zero, it is set as DESTROYED and so will stay at zero. Signed-off-by: Fred Isaman <iisaman@netapp.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
-rw-r--r--fs/nfs/callback_proc.c2
-rw-r--r--fs/nfs/pnfs.c42
2 files changed, 26 insertions, 18 deletions
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 89587573fe50..2f41dccea18e 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -188,10 +188,10 @@ static u32 initiate_bulk_draining(struct nfs_client *clp,
188 rv = NFS4ERR_DELAY; 188 rv = NFS4ERR_DELAY;
189 list_del_init(&lo->plh_bulk_recall); 189 list_del_init(&lo->plh_bulk_recall);
190 spin_unlock(&ino->i_lock); 190 spin_unlock(&ino->i_lock);
191 pnfs_free_lseg_list(&free_me_list);
191 put_layout_hdr(lo); 192 put_layout_hdr(lo);
192 iput(ino); 193 iput(ino);
193 } 194 }
194 pnfs_free_lseg_list(&free_me_list);
195 return rv; 195 return rv;
196} 196}
197 197
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index c17edfbbaebf..0f5b66f90d17 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -247,13 +247,6 @@ put_lseg_locked(struct pnfs_layout_segment *lseg,
247 BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); 247 BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
248 list_del(&lseg->pls_list); 248 list_del(&lseg->pls_list);
249 if (list_empty(&lseg->pls_layout->plh_segs)) { 249 if (list_empty(&lseg->pls_layout->plh_segs)) {
250 struct nfs_client *clp;
251
252 clp = NFS_SERVER(ino)->nfs_client;
253 spin_lock(&clp->cl_lock);
254 /* List does not take a reference, so no need for put here */
255 list_del_init(&lseg->pls_layout->plh_layouts);
256 spin_unlock(&clp->cl_lock);
257 set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags); 250 set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags);
258 /* Matched by initial refcount set in alloc_init_layout_hdr */ 251 /* Matched by initial refcount set in alloc_init_layout_hdr */
259 put_layout_hdr_locked(lseg->pls_layout); 252 put_layout_hdr_locked(lseg->pls_layout);
@@ -319,11 +312,27 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
319 return invalid - removed; 312 return invalid - removed;
320} 313}
321 314
315/* note free_me must contain lsegs from a single layout_hdr */
322void 316void
323pnfs_free_lseg_list(struct list_head *free_me) 317pnfs_free_lseg_list(struct list_head *free_me)
324{ 318{
325 struct pnfs_layout_segment *lseg, *tmp; 319 struct pnfs_layout_segment *lseg, *tmp;
320 struct pnfs_layout_hdr *lo;
321
322 if (list_empty(free_me))
323 return;
326 324
325 lo = list_first_entry(free_me, struct pnfs_layout_segment,
326 pls_list)->pls_layout;
327
328 if (test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) {
329 struct nfs_client *clp;
330
331 clp = NFS_SERVER(lo->plh_inode)->nfs_client;
332 spin_lock(&clp->cl_lock);
333 list_del_init(&lo->plh_layouts);
334 spin_unlock(&clp->cl_lock);
335 }
327 list_for_each_entry_safe(lseg, tmp, free_me, pls_list) { 336 list_for_each_entry_safe(lseg, tmp, free_me, pls_list) {
328 list_del(&lseg->pls_list); 337 list_del(&lseg->pls_list);
329 free_lseg(lseg); 338 free_lseg(lseg);
@@ -705,6 +714,7 @@ pnfs_update_layout(struct inode *ino,
705 struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; 714 struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
706 struct pnfs_layout_hdr *lo; 715 struct pnfs_layout_hdr *lo;
707 struct pnfs_layout_segment *lseg = NULL; 716 struct pnfs_layout_segment *lseg = NULL;
717 bool first = false;
708 718
709 if (!pnfs_enabled_sb(NFS_SERVER(ino))) 719 if (!pnfs_enabled_sb(NFS_SERVER(ino)))
710 return NULL; 720 return NULL;
@@ -735,7 +745,10 @@ pnfs_update_layout(struct inode *ino,
735 atomic_inc(&lo->plh_outstanding); 745 atomic_inc(&lo->plh_outstanding);
736 746
737 get_layout_hdr(lo); 747 get_layout_hdr(lo);
738 if (list_empty(&lo->plh_segs)) { 748 if (list_empty(&lo->plh_segs))
749 first = true;
750 spin_unlock(&ino->i_lock);
751 if (first) {
739 /* The lo must be on the clp list if there is any 752 /* The lo must be on the clp list if there is any
740 * chance of a CB_LAYOUTRECALL(FILE) coming in. 753 * chance of a CB_LAYOUTRECALL(FILE) coming in.
741 */ 754 */
@@ -744,17 +757,12 @@ pnfs_update_layout(struct inode *ino,
744 list_add_tail(&lo->plh_layouts, &clp->cl_layouts); 757 list_add_tail(&lo->plh_layouts, &clp->cl_layouts);
745 spin_unlock(&clp->cl_lock); 758 spin_unlock(&clp->cl_lock);
746 } 759 }
747 spin_unlock(&ino->i_lock);
748 760
749 lseg = send_layoutget(lo, ctx, iomode); 761 lseg = send_layoutget(lo, ctx, iomode);
750 if (!lseg) { 762 if (!lseg && first) {
751 spin_lock(&ino->i_lock); 763 spin_lock(&clp->cl_lock);
752 if (list_empty(&lo->plh_segs)) { 764 list_del_init(&lo->plh_layouts);
753 spin_lock(&clp->cl_lock); 765 spin_unlock(&clp->cl_lock);
754 list_del_init(&lo->plh_layouts);
755 spin_unlock(&clp->cl_lock);
756 }
757 spin_unlock(&ino->i_lock);
758 } 766 }
759 atomic_dec(&lo->plh_outstanding); 767 atomic_dec(&lo->plh_outstanding);
760 put_layout_hdr(lo); 768 put_layout_hdr(lo);