From 52fabd73199cd00932f92c9f548bdf66a5bbc23d Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Thu, 6 Jan 2011 11:36:18 +0000 Subject: pnfs: fix incorrect comment in destroy_lseg Comment references get_layout_hdr_locked, which never existed in submitted code. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index db773428f95f..6e9daffa5a37 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -225,7 +225,7 @@ destroy_lseg(struct kref *kref) dprintk("--> %s\n", __func__); NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); - /* Matched by get_layout_hdr_locked in pnfs_insert_layout */ + /* Matched by get_layout_hdr in pnfs_insert_layout */ put_layout_hdr(ino); } -- cgit v1.2.2 From 566052c53b5146e23a99ab95fb5c11f8a295a084 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Thu, 6 Jan 2011 11:36:20 +0000 Subject: pnfs: add prefix to struct pnfs_layout_segment fields While we are renaming all the fields, change lo->state to lo->plh_flags. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 66 +++++++++++++++++++++++++++++------------------------------ 1 file changed, 33 insertions(+), 33 deletions(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 6e9daffa5a37..c3ca5fe1f3bd 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -210,9 +210,9 @@ put_layout_hdr(struct inode *inode) static void init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg) { - INIT_LIST_HEAD(&lseg->fi_list); - kref_init(&lseg->kref); - lseg->layout = lo; + INIT_LIST_HEAD(&lseg->pls_list); + kref_init(&lseg->pls_refcount); + lseg->pls_layout = lo; } /* Called without i_lock held, as the free_lseg call may sleep */ @@ -220,8 +220,8 @@ static void destroy_lseg(struct kref *kref) { struct pnfs_layout_segment *lseg = - container_of(kref, struct pnfs_layout_segment, kref); - struct inode *ino = lseg->layout->inode; + container_of(kref, struct pnfs_layout_segment, pls_refcount); + struct inode *ino = lseg->pls_layout->inode; dprintk("--> %s\n", __func__); NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); @@ -236,8 +236,8 @@ put_lseg(struct pnfs_layout_segment *lseg) return; dprintk("%s: lseg %p ref %d\n", __func__, lseg, - atomic_read(&lseg->kref.refcount)); - kref_put(&lseg->kref, destroy_lseg); + atomic_read(&lseg->pls_refcount.refcount)); + kref_put(&lseg->pls_refcount, destroy_lseg); } static void @@ -249,9 +249,9 @@ pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list) dprintk("%s:Begin lo %p\n", __func__, lo); assert_spin_locked(&lo->inode->i_lock); - list_for_each_entry_safe(lseg, next, &lo->segs, fi_list) { + list_for_each_entry_safe(lseg, next, &lo->segs, pls_list) { dprintk("%s: freeing lseg %p\n", __func__, lseg); - list_move(&lseg->fi_list, tmp_list); + list_move(&lseg->pls_list, tmp_list); } clp = NFS_SERVER(lo->inode)->nfs_client; spin_lock(&clp->cl_lock); @@ -259,7 +259,7 @@ pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list) list_del_init(&lo->layouts); spin_unlock(&clp->cl_lock); write_seqlock(&lo->seqlock); - clear_bit(NFS_LAYOUT_STATEID_SET, &lo->state); + clear_bit(NFS_LAYOUT_STATEID_SET, &lo->plh_flags); write_sequnlock(&lo->seqlock); dprintk("%s:Return\n", __func__); @@ -272,9 +272,9 @@ pnfs_free_lseg_list(struct list_head *tmp_list) while (!list_empty(tmp_list)) { lseg = list_entry(tmp_list->next, struct pnfs_layout_segment, - fi_list); + pls_list); dprintk("%s calling put_lseg on %p\n", __func__, lseg); - list_del(&lseg->fi_list); + list_del(&lseg->pls_list); put_lseg(lseg); } } @@ -331,7 +331,7 @@ pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, bool overwrite = false; write_seqlock(&lo->seqlock); - if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state) || + if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->plh_flags) || memcmp(old->stateid.other, new->stateid.other, sizeof(new->stateid.other))) overwrite = true; else { @@ -360,7 +360,7 @@ pnfs_layout_from_open_stateid(struct pnfs_layout_hdr *lo, memcpy(lo->stateid.data, state->stateid.data, sizeof(state->stateid.data)); } while (read_seqretry(&state->seqlock, seq)); - set_bit(NFS_LAYOUT_STATEID_SET, &lo->state); + set_bit(NFS_LAYOUT_STATEID_SET, &lo->plh_flags); write_sequnlock(&lo->seqlock); dprintk("<-- %s\n", __func__); } @@ -374,7 +374,7 @@ pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, dprintk("--> %s\n", __func__); do { seq = read_seqbegin(&lo->seqlock); - if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state)) { + if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->plh_flags)) { /* This will trigger retry of the read */ pnfs_layout_from_open_stateid(lo, open_state); } else @@ -424,7 +424,7 @@ send_layoutget(struct pnfs_layout_hdr *lo, nfs4_proc_layoutget(lgp); if (!lseg) { /* remember that LAYOUTGET failed and suspend trying */ - set_bit(lo_fail_bit(iomode), &lo->state); + set_bit(lo_fail_bit(iomode), &lo->plh_flags); } return lseg; } @@ -459,26 +459,26 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo, list_add_tail(&lo->layouts, &clp->cl_layouts); spin_unlock(&clp->cl_lock); } - list_for_each_entry(lp, &lo->segs, fi_list) { - if (cmp_layout(lp->range.iomode, lseg->range.iomode) > 0) + list_for_each_entry(lp, &lo->segs, pls_list) { + if (cmp_layout(lp->pls_range.iomode, lseg->pls_range.iomode) > 0) continue; - list_add_tail(&lseg->fi_list, &lp->fi_list); + list_add_tail(&lseg->pls_list, &lp->pls_list); dprintk("%s: inserted lseg %p " "iomode %d offset %llu length %llu before " "lp %p iomode %d offset %llu length %llu\n", - __func__, lseg, lseg->range.iomode, - lseg->range.offset, lseg->range.length, - lp, lp->range.iomode, lp->range.offset, - lp->range.length); + __func__, lseg, lseg->pls_range.iomode, + lseg->pls_range.offset, lseg->pls_range.length, + lp, lp->pls_range.iomode, lp->pls_range.offset, + lp->pls_range.length); found = 1; break; } if (!found) { - list_add_tail(&lseg->fi_list, &lo->segs); + list_add_tail(&lseg->pls_list, &lo->segs); dprintk("%s: inserted lseg %p " "iomode %d offset %llu length %llu at tail\n", - __func__, lseg, lseg->range.iomode, - lseg->range.offset, lseg->range.length); + __func__, lseg, lseg->pls_range.iomode, + lseg->pls_range.offset, lseg->pls_range.length); } get_layout_hdr_locked(lo); @@ -538,7 +538,7 @@ pnfs_find_alloc_layout(struct inode *ino) static int is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode) { - return (iomode != IOMODE_RW || lseg->range.iomode == IOMODE_RW); + return (iomode != IOMODE_RW || lseg->pls_range.iomode == IOMODE_RW); } /* @@ -552,17 +552,17 @@ pnfs_has_layout(struct pnfs_layout_hdr *lo, u32 iomode) dprintk("%s:Begin\n", __func__); assert_spin_locked(&lo->inode->i_lock); - list_for_each_entry(lseg, &lo->segs, fi_list) { + list_for_each_entry(lseg, &lo->segs, pls_list) { if (is_matching_lseg(lseg, iomode)) { ret = lseg; break; } - if (cmp_layout(iomode, lseg->range.iomode) > 0) + if (cmp_layout(iomode, lseg->pls_range.iomode) > 0) break; } dprintk("%s:Return lseg %p ref %d\n", - __func__, ret, ret ? atomic_read(&ret->kref.refcount) : 0); + __func__, ret, ret ? atomic_read(&ret->pls_refcount.refcount) : 0); return ret; } @@ -597,7 +597,7 @@ pnfs_update_layout(struct inode *ino, } /* if LAYOUTGET already failed once we don't try again */ - if (test_bit(lo_fail_bit(iomode), &nfsi->layout->state)) + if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags)) goto out_unlock; get_layout_hdr_locked(lo); /* Matched in nfs4_layoutget_release */ @@ -606,7 +606,7 @@ pnfs_update_layout(struct inode *ino, lseg = send_layoutget(lo, ctx, iomode); out: dprintk("%s end, state 0x%lx lseg %p\n", __func__, - nfsi->layout->state, lseg); + nfsi->layout->plh_flags, lseg); return lseg; out_unlock: spin_unlock(&ino->i_lock); @@ -636,7 +636,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) spin_lock(&ino->i_lock); init_lseg(lo, lseg); - lseg->range = res->range; + lseg->pls_range = res->range; *lgp->lsegpp = lseg; pnfs_insert_layout(lo, lseg); -- cgit v1.2.2 From b7edfaa1983362842351e425adeb8e297b4c11fb Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Thu, 6 Jan 2011 11:36:21 +0000 Subject: pnfs: add prefix to struct pnfs_layout_hdr fields Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 94 +++++++++++++++++++++++++++++------------------------------ 1 file changed, 47 insertions(+), 47 deletions(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index c3ca5fe1f3bd..6736f9e4f2e1 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -180,21 +180,21 @@ EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver); static void get_layout_hdr_locked(struct pnfs_layout_hdr *lo) { - assert_spin_locked(&lo->inode->i_lock); - lo->refcount++; + assert_spin_locked(&lo->plh_inode->i_lock); + lo->plh_refcount++; } static void put_layout_hdr_locked(struct pnfs_layout_hdr *lo) { - assert_spin_locked(&lo->inode->i_lock); - BUG_ON(lo->refcount == 0); + assert_spin_locked(&lo->plh_inode->i_lock); + BUG_ON(lo->plh_refcount == 0); - lo->refcount--; - if (!lo->refcount) { + lo->plh_refcount--; + if (!lo->plh_refcount) { dprintk("%s: freeing layout cache %p\n", __func__, lo); - BUG_ON(!list_empty(&lo->layouts)); - NFS_I(lo->inode)->layout = NULL; + BUG_ON(!list_empty(&lo->plh_layouts)); + NFS_I(lo->plh_inode)->layout = NULL; kfree(lo); } } @@ -221,7 +221,7 @@ destroy_lseg(struct kref *kref) { struct pnfs_layout_segment *lseg = container_of(kref, struct pnfs_layout_segment, pls_refcount); - struct inode *ino = lseg->pls_layout->inode; + struct inode *ino = lseg->pls_layout->plh_inode; dprintk("--> %s\n", __func__); NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); @@ -248,19 +248,19 @@ pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list) dprintk("%s:Begin lo %p\n", __func__, lo); - assert_spin_locked(&lo->inode->i_lock); - list_for_each_entry_safe(lseg, next, &lo->segs, pls_list) { + assert_spin_locked(&lo->plh_inode->i_lock); + list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) { dprintk("%s: freeing lseg %p\n", __func__, lseg); list_move(&lseg->pls_list, tmp_list); } - clp = NFS_SERVER(lo->inode)->nfs_client; + clp = NFS_SERVER(lo->plh_inode)->nfs_client; spin_lock(&clp->cl_lock); /* List does not take a reference, so no need for put here */ - list_del_init(&lo->layouts); + list_del_init(&lo->plh_layouts); spin_unlock(&clp->cl_lock); - write_seqlock(&lo->seqlock); + write_seqlock(&lo->plh_seqlock); clear_bit(NFS_LAYOUT_STATEID_SET, &lo->plh_flags); - write_sequnlock(&lo->seqlock); + write_sequnlock(&lo->plh_seqlock); dprintk("%s:Return\n", __func__); } @@ -312,25 +312,25 @@ pnfs_destroy_all_layouts(struct nfs_client *clp) while (!list_empty(&tmp_list)) { lo = list_entry(tmp_list.next, struct pnfs_layout_hdr, - layouts); + plh_layouts); dprintk("%s freeing layout for inode %lu\n", __func__, - lo->inode->i_ino); - pnfs_destroy_layout(NFS_I(lo->inode)); + lo->plh_inode->i_ino); + pnfs_destroy_layout(NFS_I(lo->plh_inode)); } } -/* update lo->stateid with new if is more recent +/* update lo->plh_stateid with new if is more recent * - * lo->stateid could be the open stateid, in which case we just use what given. + * lo->plh_stateid could be the open stateid, in which case we just use what given. */ static void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new) { - nfs4_stateid *old = &lo->stateid; + nfs4_stateid *old = &lo->plh_stateid; bool overwrite = false; - write_seqlock(&lo->seqlock); + write_seqlock(&lo->plh_seqlock); if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->plh_flags) || memcmp(old->stateid.other, new->stateid.other, sizeof(new->stateid.other))) overwrite = true; @@ -344,7 +344,7 @@ pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, } if (overwrite) memcpy(&old->stateid, &new->stateid, sizeof(new->stateid)); - write_sequnlock(&lo->seqlock); + write_sequnlock(&lo->plh_seqlock); } static void @@ -354,14 +354,14 @@ pnfs_layout_from_open_stateid(struct pnfs_layout_hdr *lo, int seq; dprintk("--> %s\n", __func__); - write_seqlock(&lo->seqlock); + write_seqlock(&lo->plh_seqlock); do { seq = read_seqbegin(&state->seqlock); - memcpy(lo->stateid.data, state->stateid.data, + memcpy(lo->plh_stateid.data, state->stateid.data, sizeof(state->stateid.data)); } while (read_seqretry(&state->seqlock, seq)); set_bit(NFS_LAYOUT_STATEID_SET, &lo->plh_flags); - write_sequnlock(&lo->seqlock); + write_sequnlock(&lo->plh_seqlock); dprintk("<-- %s\n", __func__); } @@ -373,14 +373,14 @@ pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, dprintk("--> %s\n", __func__); do { - seq = read_seqbegin(&lo->seqlock); + seq = read_seqbegin(&lo->plh_seqlock); if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->plh_flags)) { /* This will trigger retry of the read */ pnfs_layout_from_open_stateid(lo, open_state); } else - memcpy(dst->data, lo->stateid.data, - sizeof(lo->stateid.data)); - } while (read_seqretry(&lo->seqlock, seq)); + memcpy(dst->data, lo->plh_stateid.data, + sizeof(lo->plh_stateid.data)); + } while (read_seqretry(&lo->plh_seqlock, seq)); dprintk("<-- %s\n", __func__); } @@ -395,7 +395,7 @@ send_layoutget(struct pnfs_layout_hdr *lo, struct nfs_open_context *ctx, u32 iomode) { - struct inode *ino = lo->inode; + struct inode *ino = lo->plh_inode; struct nfs_server *server = NFS_SERVER(ino); struct nfs4_layoutget *lgp; struct pnfs_layout_segment *lseg = NULL; @@ -405,7 +405,7 @@ send_layoutget(struct pnfs_layout_hdr *lo, BUG_ON(ctx == NULL); lgp = kzalloc(sizeof(*lgp), GFP_KERNEL); if (lgp == NULL) { - put_layout_hdr(lo->inode); + put_layout_hdr(lo->plh_inode); return NULL; } lgp->args.minlength = NFS4_MAX_UINT64; @@ -450,16 +450,16 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo, dprintk("%s:Begin\n", __func__); - assert_spin_locked(&lo->inode->i_lock); - if (list_empty(&lo->segs)) { - struct nfs_client *clp = NFS_SERVER(lo->inode)->nfs_client; + assert_spin_locked(&lo->plh_inode->i_lock); + if (list_empty(&lo->plh_segs)) { + struct nfs_client *clp = NFS_SERVER(lo->plh_inode)->nfs_client; spin_lock(&clp->cl_lock); - BUG_ON(!list_empty(&lo->layouts)); - list_add_tail(&lo->layouts, &clp->cl_layouts); + BUG_ON(!list_empty(&lo->plh_layouts)); + list_add_tail(&lo->plh_layouts, &clp->cl_layouts); spin_unlock(&clp->cl_lock); } - list_for_each_entry(lp, &lo->segs, pls_list) { + list_for_each_entry(lp, &lo->plh_segs, pls_list) { if (cmp_layout(lp->pls_range.iomode, lseg->pls_range.iomode) > 0) continue; list_add_tail(&lseg->pls_list, &lp->pls_list); @@ -474,7 +474,7 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo, break; } if (!found) { - list_add_tail(&lseg->pls_list, &lo->segs); + list_add_tail(&lseg->pls_list, &lo->plh_segs); dprintk("%s: inserted lseg %p " "iomode %d offset %llu length %llu at tail\n", __func__, lseg, lseg->pls_range.iomode, @@ -493,11 +493,11 @@ alloc_init_layout_hdr(struct inode *ino) lo = kzalloc(sizeof(struct pnfs_layout_hdr), GFP_KERNEL); if (!lo) return NULL; - lo->refcount = 1; - INIT_LIST_HEAD(&lo->layouts); - INIT_LIST_HEAD(&lo->segs); - seqlock_init(&lo->seqlock); - lo->inode = ino; + lo->plh_refcount = 1; + INIT_LIST_HEAD(&lo->plh_layouts); + INIT_LIST_HEAD(&lo->plh_segs); + seqlock_init(&lo->plh_seqlock); + lo->plh_inode = ino; return lo; } @@ -551,8 +551,8 @@ pnfs_has_layout(struct pnfs_layout_hdr *lo, u32 iomode) dprintk("%s:Begin\n", __func__); - assert_spin_locked(&lo->inode->i_lock); - list_for_each_entry(lseg, &lo->segs, pls_list) { + assert_spin_locked(&lo->plh_inode->i_lock); + list_for_each_entry(lseg, &lo->plh_segs, pls_list) { if (is_matching_lseg(lseg, iomode)) { ret = lseg; break; @@ -619,7 +619,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout; struct nfs4_layoutget_res *res = &lgp->res; struct pnfs_layout_segment *lseg; - struct inode *ino = lo->inode; + struct inode *ino = lo->plh_inode; int status = 0; /* Inject layout blob into I/O device driver */ -- cgit v1.2.2 From fd6002e9b8a93220d5f53b93d9624caf73cdc8a2 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Thu, 6 Jan 2011 11:36:22 +0000 Subject: pnfs: change layout state seqlock to a spinlock This prepares for future changes, where the layout state needs to change atomically with several other variables. In particular, it will need to know if lo->segs is empty, as we test that instead of manipulating the NFS_LAYOUT_STATEID_SET bit. Moreover, the layoutstateid is not really a read-mostly structure, as it is written almost as often as it is read. The behavior of pnfs_get_layout_stateid is also slightly changed, so that it no longer changes the stateid. Its name is changed to +pnfs_choose_layoutget_stateid. Signed-off-by: Fred Isaman Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 79 +++++++++++++++++------------------------------------------ 1 file changed, 23 insertions(+), 56 deletions(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 6736f9e4f2e1..08313f536b45 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -258,9 +258,6 @@ pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list) /* List does not take a reference, so no need for put here */ list_del_init(&lo->plh_layouts); spin_unlock(&clp->cl_lock); - write_seqlock(&lo->plh_seqlock); - clear_bit(NFS_LAYOUT_STATEID_SET, &lo->plh_flags); - write_sequnlock(&lo->plh_seqlock); dprintk("%s:Return\n", __func__); } @@ -319,69 +316,40 @@ pnfs_destroy_all_layouts(struct nfs_client *clp) } } -/* update lo->plh_stateid with new if is more recent - * - * lo->plh_stateid could be the open stateid, in which case we just use what given. - */ +/* update lo->plh_stateid with new if is more recent */ static void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new) { - nfs4_stateid *old = &lo->plh_stateid; - bool overwrite = false; - - write_seqlock(&lo->plh_seqlock); - if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->plh_flags) || - memcmp(old->stateid.other, new->stateid.other, sizeof(new->stateid.other))) - overwrite = true; - else { - u32 oldseq, newseq; - - oldseq = be32_to_cpu(old->stateid.seqid); - newseq = be32_to_cpu(new->stateid.seqid); - if ((int)(newseq - oldseq) > 0) - overwrite = true; - } - if (overwrite) - memcpy(&old->stateid, &new->stateid, sizeof(new->stateid)); - write_sequnlock(&lo->plh_seqlock); -} - -static void -pnfs_layout_from_open_stateid(struct pnfs_layout_hdr *lo, - struct nfs4_state *state) -{ - int seq; + u32 oldseq, newseq; - dprintk("--> %s\n", __func__); - write_seqlock(&lo->plh_seqlock); - do { - seq = read_seqbegin(&state->seqlock); - memcpy(lo->plh_stateid.data, state->stateid.data, - sizeof(state->stateid.data)); - } while (read_seqretry(&state->seqlock, seq)); - set_bit(NFS_LAYOUT_STATEID_SET, &lo->plh_flags); - write_sequnlock(&lo->plh_seqlock); - dprintk("<-- %s\n", __func__); + oldseq = be32_to_cpu(lo->plh_stateid.stateid.seqid); + newseq = be32_to_cpu(new->stateid.seqid); + if ((int)(newseq - oldseq) > 0) + memcpy(&lo->plh_stateid, &new->stateid, sizeof(new->stateid)); } -void -pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, - struct nfs4_state *open_state) +int +pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, + struct nfs4_state *open_state) { - int seq; + int status = 0; dprintk("--> %s\n", __func__); - do { - seq = read_seqbegin(&lo->plh_seqlock); - if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->plh_flags)) { - /* This will trigger retry of the read */ - pnfs_layout_from_open_stateid(lo, open_state); - } else - memcpy(dst->data, lo->plh_stateid.data, - sizeof(lo->plh_stateid.data)); - } while (read_seqretry(&lo->plh_seqlock, seq)); + spin_lock(&lo->plh_inode->i_lock); + if (list_empty(&lo->plh_segs)) { + int seq; + + do { + seq = read_seqbegin(&open_state->seqlock); + memcpy(dst->data, open_state->stateid.data, + sizeof(open_state->stateid.data)); + } while (read_seqretry(&open_state->seqlock, seq)); + } else + memcpy(dst->data, lo->plh_stateid.data, sizeof(lo->plh_stateid.data)); + spin_unlock(&lo->plh_inode->i_lock); dprintk("<-- %s\n", __func__); + return status; } /* @@ -496,7 +464,6 @@ alloc_init_layout_hdr(struct inode *ino) lo->plh_refcount = 1; INIT_LIST_HEAD(&lo->plh_layouts); INIT_LIST_HEAD(&lo->plh_segs); - seqlock_init(&lo->plh_seqlock); lo->plh_inode = ino; return lo; } -- cgit v1.2.2 From 4541d16c024ce40a0781e03c185ecdfe34aec46f Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Thu, 6 Jan 2011 11:36:23 +0000 Subject: pnfs: change how lsegs are removed from layout list This is to prepare the way for sensible io draining. Instead of just removing the lseg from the list, we instead clear the VALID flag (preventing new io from grabbing references to the lseg) and remove the reference holding it in the list. Thus the lseg will be removed once any io in progress completes and any references still held are dropped. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 130 +++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 88 insertions(+), 42 deletions(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 08313f536b45..212cbc22c59d 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -211,68 +211,109 @@ static void init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg) { INIT_LIST_HEAD(&lseg->pls_list); - kref_init(&lseg->pls_refcount); + atomic_set(&lseg->pls_refcount, 1); + smp_mb(); + set_bit(NFS_LSEG_VALID, &lseg->pls_flags); lseg->pls_layout = lo; } -/* Called without i_lock held, as the free_lseg call may sleep */ -static void -destroy_lseg(struct kref *kref) +static void free_lseg(struct pnfs_layout_segment *lseg) { - struct pnfs_layout_segment *lseg = - container_of(kref, struct pnfs_layout_segment, pls_refcount); struct inode *ino = lseg->pls_layout->plh_inode; - dprintk("--> %s\n", __func__); NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); /* Matched by get_layout_hdr in pnfs_insert_layout */ put_layout_hdr(ino); } -static void -put_lseg(struct pnfs_layout_segment *lseg) +/* The use of tmp_list is necessary because pnfs_curr_ld->free_lseg + * could sleep, so must be called outside of the lock. + * Returns 1 if object was removed, otherwise return 0. + */ +static int +put_lseg_locked(struct pnfs_layout_segment *lseg, + struct list_head *tmp_list) { - if (!lseg) - return; + dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, + atomic_read(&lseg->pls_refcount), + test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); + if (atomic_dec_and_test(&lseg->pls_refcount)) { + struct inode *ino = lseg->pls_layout->plh_inode; - dprintk("%s: lseg %p ref %d\n", __func__, lseg, - atomic_read(&lseg->pls_refcount.refcount)); - kref_put(&lseg->pls_refcount, destroy_lseg); + BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); + list_del(&lseg->pls_list); + if (list_empty(&lseg->pls_layout->plh_segs)) { + struct nfs_client *clp; + + clp = NFS_SERVER(ino)->nfs_client; + spin_lock(&clp->cl_lock); + /* List does not take a reference, so no need for put here */ + list_del_init(&lseg->pls_layout->plh_layouts); + spin_unlock(&clp->cl_lock); + } + list_add(&lseg->pls_list, tmp_list); + return 1; + } + return 0; } -static void -pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list) +static bool +should_free_lseg(u32 lseg_iomode, u32 recall_iomode) { - struct pnfs_layout_segment *lseg, *next; - struct nfs_client *clp; + return (recall_iomode == IOMODE_ANY || + lseg_iomode == recall_iomode); +} - dprintk("%s:Begin lo %p\n", __func__, lo); +/* Returns 1 if lseg is removed from list, 0 otherwise */ +static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, + struct list_head *tmp_list) +{ + int rv = 0; - assert_spin_locked(&lo->plh_inode->i_lock); - list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) { - dprintk("%s: freeing lseg %p\n", __func__, lseg); - list_move(&lseg->pls_list, tmp_list); + if (test_and_clear_bit(NFS_LSEG_VALID, &lseg->pls_flags)) { + /* Remove the reference keeping the lseg in the + * list. It will now be removed when all + * outstanding io is finished. + */ + rv = put_lseg_locked(lseg, tmp_list); } - clp = NFS_SERVER(lo->plh_inode)->nfs_client; - spin_lock(&clp->cl_lock); - /* List does not take a reference, so no need for put here */ - list_del_init(&lo->plh_layouts); - spin_unlock(&clp->cl_lock); + return rv; +} - dprintk("%s:Return\n", __func__); +/* Returns count of number of matching invalid lsegs remaining in list + * after call. + */ +static int +mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, + struct list_head *tmp_list, + u32 iomode) +{ + struct pnfs_layout_segment *lseg, *next; + int invalid = 0, removed = 0; + + dprintk("%s:Begin lo %p\n", __func__, lo); + + list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) + if (should_free_lseg(lseg->pls_range.iomode, iomode)) { + dprintk("%s: freeing lseg %p iomode %d " + "offset %llu length %llu\n", __func__, + lseg, lseg->pls_range.iomode, lseg->pls_range.offset, + lseg->pls_range.length); + invalid++; + removed += mark_lseg_invalid(lseg, tmp_list); + } + dprintk("%s:Return %i\n", __func__, invalid - removed); + return invalid - removed; } static void -pnfs_free_lseg_list(struct list_head *tmp_list) +pnfs_free_lseg_list(struct list_head *free_me) { - struct pnfs_layout_segment *lseg; + struct pnfs_layout_segment *lseg, *tmp; - while (!list_empty(tmp_list)) { - lseg = list_entry(tmp_list->next, struct pnfs_layout_segment, - pls_list); - dprintk("%s calling put_lseg on %p\n", __func__, lseg); + list_for_each_entry_safe(lseg, tmp, free_me, pls_list) { list_del(&lseg->pls_list); - put_lseg(lseg); + free_lseg(lseg); } } @@ -285,7 +326,8 @@ pnfs_destroy_layout(struct nfs_inode *nfsi) spin_lock(&nfsi->vfs_inode.i_lock); lo = nfsi->layout; if (lo) { - pnfs_clear_lseg_list(lo, &tmp_list); + set_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags); + mark_matching_lsegs_invalid(lo, &tmp_list, IOMODE_ANY); /* Matched by refcount set to 1 in alloc_init_layout_hdr */ put_layout_hdr_locked(lo); } @@ -477,9 +519,12 @@ pnfs_find_alloc_layout(struct inode *ino) dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout); assert_spin_locked(&ino->i_lock); - if (nfsi->layout) - return nfsi->layout; - + if (nfsi->layout) { + if (test_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags)) + return NULL; + else + return nfsi->layout; + } spin_unlock(&ino->i_lock); new = alloc_init_layout_hdr(ino); spin_lock(&ino->i_lock); @@ -520,7 +565,8 @@ pnfs_has_layout(struct pnfs_layout_hdr *lo, u32 iomode) assert_spin_locked(&lo->plh_inode->i_lock); list_for_each_entry(lseg, &lo->plh_segs, pls_list) { - if (is_matching_lseg(lseg, iomode)) { + if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && + is_matching_lseg(lseg, iomode)) { ret = lseg; break; } @@ -529,7 +575,7 @@ pnfs_has_layout(struct pnfs_layout_hdr *lo, u32 iomode) } dprintk("%s:Return lseg %p ref %d\n", - __func__, ret, ret ? atomic_read(&ret->pls_refcount.refcount) : 0); + __func__, ret, ret ? atomic_read(&ret->pls_refcount) : 0); return ret; } -- cgit v1.2.2 From cf7d63f1f9895713551df2e6d18b006f8af26e91 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Thu, 6 Jan 2011 11:36:25 +0000 Subject: pnfs: serialize LAYOUTGET(openstateid) We shouldn't send a LAYOUTGET(openstateid) unless all outstanding RPCs using the previous stateid are completed. This requires choosing the stateid to encode earlier, so we can abort if one is not available (we want to use the open stateid, but a LAYOUTGET is already out using it), and adding a count of the number of outstanding rpc calls using layout state (which for now consist solely of LAYOUTGETs). Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 212cbc22c59d..59ed68bf79fa 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -371,6 +371,14 @@ pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, memcpy(&lo->plh_stateid, &new->stateid, sizeof(new->stateid)); } +/* lget is set to 1 if called from inside send_layoutget call chain */ +static bool +pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, int lget) +{ + return (list_empty(&lo->plh_segs) && + (atomic_read(&lo->plh_outstanding) > lget)); +} + int pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, struct nfs4_state *open_state) @@ -379,7 +387,9 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, dprintk("--> %s\n", __func__); spin_lock(&lo->plh_inode->i_lock); - if (list_empty(&lo->plh_segs)) { + if (pnfs_layoutgets_blocked(lo, 1)) { + status = -EAGAIN; + } else if (list_empty(&lo->plh_segs)) { int seq; do { @@ -414,10 +424,8 @@ send_layoutget(struct pnfs_layout_hdr *lo, BUG_ON(ctx == NULL); lgp = kzalloc(sizeof(*lgp), GFP_KERNEL); - if (lgp == NULL) { - put_layout_hdr(lo->plh_inode); + if (lgp == NULL) return NULL; - } lgp->args.minlength = NFS4_MAX_UINT64; lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; lgp->args.range.iomode = iomode; @@ -613,10 +621,16 @@ pnfs_update_layout(struct inode *ino, if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags)) goto out_unlock; - get_layout_hdr_locked(lo); /* Matched in nfs4_layoutget_release */ + if (pnfs_layoutgets_blocked(lo, 0)) + goto out_unlock; + atomic_inc(&lo->plh_outstanding); + + get_layout_hdr_locked(lo); spin_unlock(&ino->i_lock); lseg = send_layoutget(lo, ctx, iomode); + atomic_dec(&lo->plh_outstanding); + put_layout_hdr(ino); out: dprintk("%s end, state 0x%lx lseg %p\n", __func__, nfsi->layout->plh_flags, lseg); -- cgit v1.2.2 From 2130ff663633e8a57921779ebfe62fc39d5585ec Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Thu, 6 Jan 2011 11:36:26 +0000 Subject: pnfs: add layout to client list before sending rpc Since this list will be used to search for layouts to recall, this is necessary to avoid a race where the recall comes in, sees there is nothing in the client list, and prepares to return NOMATCHING, while the LAYOUTGET gets processed before the recall updates the stateid. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 59ed68bf79fa..c00b673261f9 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -469,14 +469,6 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo, dprintk("%s:Begin\n", __func__); assert_spin_locked(&lo->plh_inode->i_lock); - if (list_empty(&lo->plh_segs)) { - struct nfs_client *clp = NFS_SERVER(lo->plh_inode)->nfs_client; - - spin_lock(&clp->cl_lock); - BUG_ON(!list_empty(&lo->plh_layouts)); - list_add_tail(&lo->plh_layouts, &clp->cl_layouts); - spin_unlock(&clp->cl_lock); - } list_for_each_entry(lp, &lo->plh_segs, pls_list) { if (cmp_layout(lp->pls_range.iomode, lseg->pls_range.iomode) > 0) continue; @@ -597,6 +589,7 @@ pnfs_update_layout(struct inode *ino, enum pnfs_iomode iomode) { struct nfs_inode *nfsi = NFS_I(ino); + struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; struct pnfs_layout_hdr *lo; struct pnfs_layout_segment *lseg = NULL; @@ -626,9 +619,27 @@ pnfs_update_layout(struct inode *ino, atomic_inc(&lo->plh_outstanding); get_layout_hdr_locked(lo); + if (list_empty(&lo->plh_segs)) { + /* The lo must be on the clp list if there is any + * chance of a CB_LAYOUTRECALL(FILE) coming in. + */ + spin_lock(&clp->cl_lock); + BUG_ON(!list_empty(&lo->plh_layouts)); + list_add_tail(&lo->plh_layouts, &clp->cl_layouts); + spin_unlock(&clp->cl_lock); + } spin_unlock(&ino->i_lock); lseg = send_layoutget(lo, ctx, iomode); + if (!lseg) { + spin_lock(&ino->i_lock); + if (list_empty(&lo->plh_segs)) { + spin_lock(&clp->cl_lock); + list_del_init(&lo->plh_layouts); + spin_unlock(&clp->cl_lock); + } + spin_unlock(&ino->i_lock); + } atomic_dec(&lo->plh_outstanding); put_layout_hdr(ino); out: -- cgit v1.2.2 From fc1794c5b04f5322bad05385cd91b52ec85aab72 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Thu, 6 Jan 2011 11:36:27 +0000 Subject: pnfs: check that partial LAYOUTGET return is ignored Either a bad server reply, or our ignoring of multiple array segments in a reply, can cause a reply to not meet our requirements. Ensure that we ignore such replies. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index c00b673261f9..cd9906415a14 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -660,6 +660,17 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) struct inode *ino = lo->plh_inode; int status = 0; + /* Verify we got what we asked for. + * Note that because the xdr parsing only accepts a single + * element array, this can fail even if the server is behaving + * correctly. + */ + if (lgp->args.range.iomode > res->range.iomode || + res->range.offset != 0 || + res->range.length != NFS4_MAX_UINT64) { + status = -EINVAL; + goto out; + } /* Inject layout blob into I/O device driver */ lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res); if (!lseg || IS_ERR(lseg)) { -- cgit v1.2.2 From cc6e5340b0981feac5a00a992bab6154cb4b1fa1 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Thu, 6 Jan 2011 11:36:28 +0000 Subject: pnfs: change lo refcounting to atomic_t This will be required to allow us to grab reference outside of i_lock. While we are at it, make put_layout_hdr take the same argument as all the related functions. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 48 ++++++++++++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 22 deletions(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index cd9906415a14..32b66468e5db 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -177,34 +177,38 @@ EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver); * pNFS client layout cache */ +/* Need to hold i_lock if caller does not already hold reference */ static void -get_layout_hdr_locked(struct pnfs_layout_hdr *lo) +get_layout_hdr(struct pnfs_layout_hdr *lo) { - assert_spin_locked(&lo->plh_inode->i_lock); - lo->plh_refcount++; + atomic_inc(&lo->plh_refcount); } static void -put_layout_hdr_locked(struct pnfs_layout_hdr *lo) +destroy_layout_hdr(struct pnfs_layout_hdr *lo) { - assert_spin_locked(&lo->plh_inode->i_lock); - BUG_ON(lo->plh_refcount == 0); + dprintk("%s: freeing layout cache %p\n", __func__, lo); + BUG_ON(!list_empty(&lo->plh_layouts)); + NFS_I(lo->plh_inode)->layout = NULL; + kfree(lo); +} - lo->plh_refcount--; - if (!lo->plh_refcount) { - dprintk("%s: freeing layout cache %p\n", __func__, lo); - BUG_ON(!list_empty(&lo->plh_layouts)); - NFS_I(lo->plh_inode)->layout = NULL; - kfree(lo); - } +static void +put_layout_hdr_locked(struct pnfs_layout_hdr *lo) +{ + if (atomic_dec_and_test(&lo->plh_refcount)) + destroy_layout_hdr(lo); } void -put_layout_hdr(struct inode *inode) +put_layout_hdr(struct pnfs_layout_hdr *lo) { - spin_lock(&inode->i_lock); - put_layout_hdr_locked(NFS_I(inode)->layout); - spin_unlock(&inode->i_lock); + struct inode *inode = lo->plh_inode; + + if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { + destroy_layout_hdr(lo); + spin_unlock(&inode->i_lock); + } } static void @@ -223,7 +227,7 @@ static void free_lseg(struct pnfs_layout_segment *lseg) NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); /* Matched by get_layout_hdr in pnfs_insert_layout */ - put_layout_hdr(ino); + put_layout_hdr(NFS_I(ino)->layout); } /* The use of tmp_list is necessary because pnfs_curr_ld->free_lseg @@ -490,7 +494,7 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo, __func__, lseg, lseg->pls_range.iomode, lseg->pls_range.offset, lseg->pls_range.length); } - get_layout_hdr_locked(lo); + get_layout_hdr(lo); dprintk("%s:Return\n", __func__); } @@ -503,7 +507,7 @@ alloc_init_layout_hdr(struct inode *ino) lo = kzalloc(sizeof(struct pnfs_layout_hdr), GFP_KERNEL); if (!lo) return NULL; - lo->plh_refcount = 1; + atomic_set(&lo->plh_refcount, 1); INIT_LIST_HEAD(&lo->plh_layouts); INIT_LIST_HEAD(&lo->plh_segs); lo->plh_inode = ino; @@ -618,7 +622,7 @@ pnfs_update_layout(struct inode *ino, goto out_unlock; atomic_inc(&lo->plh_outstanding); - get_layout_hdr_locked(lo); + get_layout_hdr(lo); if (list_empty(&lo->plh_segs)) { /* The lo must be on the clp list if there is any * chance of a CB_LAYOUTRECALL(FILE) coming in. @@ -641,7 +645,7 @@ pnfs_update_layout(struct inode *ino, spin_unlock(&ino->i_lock); } atomic_dec(&lo->plh_outstanding); - put_layout_hdr(ino); + put_layout_hdr(lo); out: dprintk("%s end, state 0x%lx lseg %p\n", __func__, nfsi->layout->plh_flags, lseg); -- cgit v1.2.2 From 43f1b3da8b35d706d6c47880fc211d2519b4a587 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Thu, 6 Jan 2011 11:36:30 +0000 Subject: pnfs: add CB_LAYOUTRECALL handling This is the heart of the wave 2 submission. Add the code to trigger drain and forget of any afected layouts. In addition, we set a "barrier", below which any LAYOUTGET reply is ignored. This is to compensate for the fact that we do not wait for outstanding LAYOUTGETs to complete as per section 12.5.5.2.1 of RFC 5661. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 83 +++++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 64 insertions(+), 19 deletions(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 32b66468e5db..bf4186b8f2fc 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -178,7 +178,7 @@ EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver); */ /* Need to hold i_lock if caller does not already hold reference */ -static void +void get_layout_hdr(struct pnfs_layout_hdr *lo) { atomic_inc(&lo->plh_refcount); @@ -254,6 +254,7 @@ put_lseg_locked(struct pnfs_layout_segment *lseg, /* List does not take a reference, so no need for put here */ list_del_init(&lseg->pls_layout->plh_layouts); spin_unlock(&clp->cl_lock); + clear_bit(NFS_LAYOUT_BULK_RECALL, &lseg->pls_layout->plh_flags); } list_add(&lseg->pls_list, tmp_list); return 1; @@ -287,7 +288,7 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, /* Returns count of number of matching invalid lsegs remaining in list * after call. */ -static int +int mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, struct list_head *tmp_list, u32 iomode) @@ -310,7 +311,7 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, return invalid - removed; } -static void +void pnfs_free_lseg_list(struct list_head *free_me) { struct pnfs_layout_segment *lseg, *tmp; @@ -363,23 +364,45 @@ pnfs_destroy_all_layouts(struct nfs_client *clp) } /* update lo->plh_stateid with new if is more recent */ -static void -pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, - const nfs4_stateid *new) +void +pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, + bool update_barrier) { u32 oldseq, newseq; oldseq = be32_to_cpu(lo->plh_stateid.stateid.seqid); newseq = be32_to_cpu(new->stateid.seqid); - if ((int)(newseq - oldseq) > 0) + if ((int)(newseq - oldseq) > 0) { memcpy(&lo->plh_stateid, &new->stateid, sizeof(new->stateid)); + if (update_barrier) { + u32 new_barrier = be32_to_cpu(new->stateid.seqid); + + if ((int)(new_barrier - lo->plh_barrier)) + lo->plh_barrier = new_barrier; + } else { + /* Because of wraparound, we want to keep the barrier + * "close" to the current seqids. It needs to be + * within 2**31 to count as "behind", so if it + * gets too near that limit, give us a litle leeway + * and bring it to within 2**30. + * NOTE - and yes, this is all unsigned arithmetic. + */ + if (unlikely((newseq - lo->plh_barrier) > (3 << 29))) + lo->plh_barrier = newseq - (1 << 30); + } + } } /* lget is set to 1 if called from inside send_layoutget call chain */ static bool -pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, int lget) -{ - return (list_empty(&lo->plh_segs) && +pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid, + int lget) +{ + if ((stateid) && + (int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0) + return true; + return test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || + (list_empty(&lo->plh_segs) && (atomic_read(&lo->plh_outstanding) > lget)); } @@ -391,7 +414,7 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, dprintk("--> %s\n", __func__); spin_lock(&lo->plh_inode->i_lock); - if (pnfs_layoutgets_blocked(lo, 1)) { + if (pnfs_layoutgets_blocked(lo, NULL, 1)) { status = -EAGAIN; } else if (list_empty(&lo->plh_segs)) { int seq; @@ -510,6 +533,7 @@ alloc_init_layout_hdr(struct inode *ino) atomic_set(&lo->plh_refcount, 1); INIT_LIST_HEAD(&lo->plh_layouts); INIT_LIST_HEAD(&lo->plh_segs); + INIT_LIST_HEAD(&lo->plh_bulk_recall); lo->plh_inode = ino; return lo; } @@ -561,7 +585,7 @@ is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode) * lookup range in layout */ static struct pnfs_layout_segment * -pnfs_has_layout(struct pnfs_layout_hdr *lo, u32 iomode) +pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode) { struct pnfs_layout_segment *lseg, *ret = NULL; @@ -606,19 +630,22 @@ pnfs_update_layout(struct inode *ino, goto out_unlock; } - /* Check to see if the layout for the given range already exists */ - lseg = pnfs_has_layout(lo, iomode); - if (lseg) { - dprintk("%s: Using cached lseg %p for iomode %d)\n", - __func__, lseg, iomode); + /* Do we even need to bother with this? */ + if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) || + test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { + dprintk("%s matches recall, use MDS\n", __func__); goto out_unlock; } + /* Check to see if the layout for the given range already exists */ + lseg = pnfs_find_lseg(lo, iomode); + if (lseg) + goto out_unlock; /* if LAYOUTGET already failed once we don't try again */ if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags)) goto out_unlock; - if (pnfs_layoutgets_blocked(lo, 0)) + if (pnfs_layoutgets_blocked(lo, NULL, 0)) goto out_unlock; atomic_inc(&lo->plh_outstanding); @@ -641,6 +668,7 @@ pnfs_update_layout(struct inode *ino, spin_lock(&clp->cl_lock); list_del_init(&lo->plh_layouts); spin_unlock(&clp->cl_lock); + clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); } spin_unlock(&ino->i_lock); } @@ -662,6 +690,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) struct nfs4_layoutget_res *res = &lgp->res; struct pnfs_layout_segment *lseg; struct inode *ino = lo->plh_inode; + struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; int status = 0; /* Verify we got what we asked for. @@ -688,16 +717,32 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) } spin_lock(&ino->i_lock); + if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) || + test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { + dprintk("%s forget reply due to recall\n", __func__); + goto out_forget_reply; + } + + if (pnfs_layoutgets_blocked(lo, &res->stateid, 1)) { + dprintk("%s forget reply due to state\n", __func__); + goto out_forget_reply; + } init_lseg(lo, lseg); lseg->pls_range = res->range; *lgp->lsegpp = lseg; pnfs_insert_layout(lo, lseg); /* Done processing layoutget. Set the layout stateid */ - pnfs_set_layout_stateid(lo, &res->stateid); + pnfs_set_layout_stateid(lo, &res->stateid, false); spin_unlock(&ino->i_lock); out: return status; + +out_forget_reply: + spin_unlock(&ino->i_lock); + lseg->pls_layout = lo; + NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); + goto out; } /* -- cgit v1.2.2 From f7e8917a67980924651a9e244510e63ef05c7755 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Thu, 6 Jan 2011 11:36:32 +0000 Subject: pnfs: layout roc code A layout can request return-on-close. How this interacts with the forgetful model of never sending LAYOUTRETURNS is a bit ambiguous. We forget any layouts marked roc, and wait for them to be completely forgotten before continuing with the close. In addition, to compensate for races with any inflight LAYOUTGETs, and the fact that we do not get any layout stateid back from the server, we set the barrier to the worst case scenario of current_seqid + number of outstanding LAYOUTGETS. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 85 insertions(+), 1 deletion(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index bf4186b8f2fc..bc4089769735 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -256,6 +256,7 @@ put_lseg_locked(struct pnfs_layout_segment *lseg, spin_unlock(&clp->cl_lock); clear_bit(NFS_LAYOUT_BULK_RECALL, &lseg->pls_layout->plh_flags); } + rpc_wake_up(&NFS_SERVER(ino)->roc_rpcwaitq); list_add(&lseg->pls_list, tmp_list); return 1; } @@ -401,7 +402,8 @@ pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid, if ((stateid) && (int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0) return true; - return test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || + return lo->plh_block_lgets || + test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || (list_empty(&lo->plh_segs) && (atomic_read(&lo->plh_outstanding) > lget)); } @@ -474,6 +476,83 @@ send_layoutget(struct pnfs_layout_hdr *lo, return lseg; } +bool pnfs_roc(struct inode *ino) +{ + struct pnfs_layout_hdr *lo; + struct pnfs_layout_segment *lseg, *tmp; + LIST_HEAD(tmp_list); + bool found = false; + + spin_lock(&ino->i_lock); + lo = NFS_I(ino)->layout; + if (!lo || !test_and_clear_bit(NFS_LAYOUT_ROC, &lo->plh_flags) || + test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) + goto out_nolayout; + list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list) + if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { + mark_lseg_invalid(lseg, &tmp_list); + found = true; + } + if (!found) + goto out_nolayout; + lo->plh_block_lgets++; + get_layout_hdr(lo); /* matched in pnfs_roc_release */ + spin_unlock(&ino->i_lock); + pnfs_free_lseg_list(&tmp_list); + return true; + +out_nolayout: + spin_unlock(&ino->i_lock); + return false; +} + +void pnfs_roc_release(struct inode *ino) +{ + struct pnfs_layout_hdr *lo; + + spin_lock(&ino->i_lock); + lo = NFS_I(ino)->layout; + lo->plh_block_lgets--; + put_layout_hdr_locked(lo); + spin_unlock(&ino->i_lock); +} + +void pnfs_roc_set_barrier(struct inode *ino, u32 barrier) +{ + struct pnfs_layout_hdr *lo; + + spin_lock(&ino->i_lock); + lo = NFS_I(ino)->layout; + if ((int)(barrier - lo->plh_barrier) > 0) + lo->plh_barrier = barrier; + spin_unlock(&ino->i_lock); +} + +bool pnfs_roc_drain(struct inode *ino, u32 *barrier) +{ + struct nfs_inode *nfsi = NFS_I(ino); + struct pnfs_layout_segment *lseg; + bool found = false; + + spin_lock(&ino->i_lock); + list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list) + if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { + found = true; + break; + } + if (!found) { + struct pnfs_layout_hdr *lo = nfsi->layout; + u32 current_seqid = be32_to_cpu(lo->plh_stateid.stateid.seqid); + + /* Since close does not return a layout stateid for use as + * a barrier, we choose the worst-case barrier. + */ + *barrier = current_seqid + atomic_read(&lo->plh_outstanding); + } + spin_unlock(&ino->i_lock); + return found; +} + /* * Compare two layout segments for sorting into layout cache. * We want to preferentially return RW over RO layouts, so ensure those @@ -732,6 +811,11 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) *lgp->lsegpp = lseg; pnfs_insert_layout(lo, lseg); + if (res->return_on_close) { + set_bit(NFS_LSEG_ROC, &lseg->pls_flags); + set_bit(NFS_LAYOUT_ROC, &lo->plh_flags); + } + /* Done processing layoutget. Set the layout stateid */ pnfs_set_layout_stateid(lo, &res->stateid, false); spin_unlock(&ino->i_lock); -- cgit v1.2.2