aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorFred Isaman <iisaman@citi.umich.edu>2011-07-30 20:52:52 -0400
committerTrond Myklebust <Trond.Myklebust@netapp.com>2011-07-31 12:18:17 -0400
commitb2be7811dd94816f3df76708c8eb7f55bf7289e2 (patch)
tree45bd90a9b478dc0a81485cfe9ec228141d4dcbf2 /fs
parent90ace12ac42f65d1f077c5ef5ec2efafdcac338f (diff)
pnfsblock: cleanup_layoutcommit
In blocklayout driver. There are two things happening while layoutcommit/cleanup. 1. the modified extents are encoded. 2. On cleanup the extents are put back on the layout rw extents list, for reads. In the new system where actual xdr encoding is done in encode_layoutcommit() directly into xdr buffer, these are the new commit stages: 1. On setup_layoutcommit, the range is adjusted as before and a structure is allocated for communication with bl_encode_layoutcommit && bl_cleanup_layoutcommit (Generic layer provides a void-star to hang it on) 2. bl_encode_layoutcommit is called to do the actual encoding directly into xdr. The commit-extent-list is not freed and is stored on above structure. FIXME: The code is not yet converted to the new XDR cleanup 3. On cleanup the commit-extent-list is put back by a call to set_to_rw() as before, but with no need for XDR decoding of the list as before. And the commit-extent-list is freed. Finally allocated structure is freed. [rm inode and pnfs_layout_hdr args from cleanup_layoutcommit()] Signed-off-by: Jim Rees <rees@umich.edu> [pnfsblock: introduce bl_committing list] Signed-off-by: Peng Tao <peng_tao@emc.com> [pnfsblock: SQUASHME: adjust to API change] Signed-off-by: Fred Isaman <iisaman@citi.umich.edu> [blocklayout: encode_layoutcommit implementation] Signed-off-by: Boaz Harrosh <bharrosh@panasas.com> [pnfsblock: fix bug setting up layoutcommit.] Signed-off-by: Tao Guo <guotao@nrchpc.ac.cn> [pnfsblock: cleanup_layoutcommit wants a status parameter] Signed-off-by: Boaz Harrosh <bharrosh@panasas.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com> Signed-off-by: Benny Halevy <bhalevy@tonian.com> Signed-off-by: Jim Rees <rees@umich.edu> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/nfs/blocklayout/blocklayout.c4
-rw-r--r--fs/nfs/blocklayout/blocklayout.h3
-rw-r--r--fs/nfs/blocklayout/extents.c210
3 files changed, 217 insertions, 0 deletions
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index d096835cfd6b..6c1bafb8920b 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -162,6 +162,10 @@ bl_encode_layoutcommit(struct pnfs_layout_hdr *lo, struct xdr_stream *xdr,
162static void 162static void
163bl_cleanup_layoutcommit(struct nfs4_layoutcommit_data *lcdata) 163bl_cleanup_layoutcommit(struct nfs4_layoutcommit_data *lcdata)
164{ 164{
165 struct pnfs_layout_hdr *lo = NFS_I(lcdata->args.inode)->layout;
166
167 dprintk("%s enter\n", __func__);
168 clean_pnfs_block_layoutupdate(BLK_LO2EXT(lo), &lcdata->args, lcdata->res.status);
165} 169}
166 170
167static void free_blk_mountid(struct block_mount_id *mid) 171static void free_blk_mountid(struct block_mount_id *mid)
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index 3caaefce85a5..6a703b79c33d 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -196,6 +196,9 @@ int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect);
196int encode_pnfs_block_layoutupdate(struct pnfs_block_layout *bl, 196int encode_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
197 struct xdr_stream *xdr, 197 struct xdr_stream *xdr,
198 const struct nfs4_layoutcommit_args *arg); 198 const struct nfs4_layoutcommit_args *arg);
199void clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
200 const struct nfs4_layoutcommit_args *arg,
201 int status);
199int bl_add_merge_extent(struct pnfs_block_layout *bl, 202int bl_add_merge_extent(struct pnfs_block_layout *bl,
200 struct pnfs_block_extent *new); 203 struct pnfs_block_extent *new);
201 204
diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c
index 84bf24087720..7521940dcca5 100644
--- a/fs/nfs/blocklayout/extents.c
+++ b/fs/nfs/blocklayout/extents.c
@@ -329,6 +329,73 @@ static void print_clist(struct list_head *list, unsigned int count)
329 } 329 }
330} 330}
331 331
332/* Note: In theory, we should do more checking that devid's match between
333 * old and new, but if they don't, the lists are too corrupt to salvage anyway.
334 */
335/* Note this is very similar to bl_add_merge_extent */
336static void add_to_commitlist(struct pnfs_block_layout *bl,
337 struct pnfs_block_short_extent *new)
338{
339 struct list_head *clist = &bl->bl_commit;
340 struct pnfs_block_short_extent *old, *save;
341 sector_t end = new->bse_f_offset + new->bse_length;
342
343 dprintk("%s enter\n", __func__);
344 print_short_extent(new);
345 print_clist(clist, bl->bl_count);
346 bl->bl_count++;
347 /* Scan for proper place to insert, extending new to the left
348 * as much as possible.
349 */
350 list_for_each_entry_safe(old, save, clist, bse_node) {
351 if (new->bse_f_offset < old->bse_f_offset)
352 break;
353 if (end <= old->bse_f_offset + old->bse_length) {
354 /* Range is already in list */
355 bl->bl_count--;
356 kfree(new);
357 return;
358 } else if (new->bse_f_offset <=
359 old->bse_f_offset + old->bse_length) {
360 /* new overlaps or abuts existing be */
361 if (new->bse_mdev == old->bse_mdev) {
362 /* extend new to fully replace old */
363 new->bse_length += new->bse_f_offset -
364 old->bse_f_offset;
365 new->bse_f_offset = old->bse_f_offset;
366 list_del(&old->bse_node);
367 bl->bl_count--;
368 kfree(old);
369 }
370 }
371 }
372 /* Note that if we never hit the above break, old will not point to a
373 * valid extent. However, in that case &old->bse_node==list.
374 */
375 list_add_tail(&new->bse_node, &old->bse_node);
376 /* Scan forward for overlaps. If we find any, extend new and
377 * remove the overlapped extent.
378 */
379 old = list_prepare_entry(new, clist, bse_node);
380 list_for_each_entry_safe_continue(old, save, clist, bse_node) {
381 if (end < old->bse_f_offset)
382 break;
383 /* new overlaps or abuts old */
384 if (new->bse_mdev == old->bse_mdev) {
385 if (end < old->bse_f_offset + old->bse_length) {
386 /* extend new to fully cover old */
387 end = old->bse_f_offset + old->bse_length;
388 new->bse_length = end - new->bse_f_offset;
389 }
390 list_del(&old->bse_node);
391 bl->bl_count--;
392 kfree(old);
393 }
394 }
395 dprintk("%s: after merging\n", __func__);
396 print_clist(clist, bl->bl_count);
397}
398
332static void print_bl_extent(struct pnfs_block_extent *be) 399static void print_bl_extent(struct pnfs_block_extent *be)
333{ 400{
334 dprintk("PRINT EXTENT extent %p\n", be); 401 dprintk("PRINT EXTENT extent %p\n", be);
@@ -539,6 +606,34 @@ bl_find_get_extent(struct pnfs_block_layout *bl, sector_t isect,
539 return ret; 606 return ret;
540} 607}
541 608
609/* Similar to bl_find_get_extent, but called with lock held, and ignores cow */
610static struct pnfs_block_extent *
611bl_find_get_extent_locked(struct pnfs_block_layout *bl, sector_t isect)
612{
613 struct pnfs_block_extent *be, *ret = NULL;
614 int i;
615
616 dprintk("%s enter with isect %llu\n", __func__, (u64)isect);
617 for (i = 0; i < EXTENT_LISTS; i++) {
618 if (ret)
619 break;
620 list_for_each_entry_reverse(be, &bl->bl_extents[i], be_node) {
621 if (isect >= be->be_f_offset + be->be_length)
622 break;
623 if (isect >= be->be_f_offset) {
624 /* We have found an extent */
625 dprintk("%s Get %p (%i)\n", __func__, be,
626 atomic_read(&be->be_refcnt.refcount));
627 kref_get(&be->be_refcnt);
628 ret = be;
629 break;
630 }
631 }
632 }
633 print_bl_extent(ret);
634 return ret;
635}
636
542int 637int
543encode_pnfs_block_layoutupdate(struct pnfs_block_layout *bl, 638encode_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
544 struct xdr_stream *xdr, 639 struct xdr_stream *xdr,
@@ -628,3 +723,118 @@ _front_merge(struct pnfs_block_extent *be, struct list_head *head,
628 kfree(storage); 723 kfree(storage);
629 return be; 724 return be;
630} 725}
726
727static u64
728set_to_rw(struct pnfs_block_layout *bl, u64 offset, u64 length)
729{
730 u64 rv = offset + length;
731 struct pnfs_block_extent *be, *e1, *e2, *e3, *new, *old;
732 struct pnfs_block_extent *children[3];
733 struct pnfs_block_extent *merge1 = NULL, *merge2 = NULL;
734 int i = 0, j;
735
736 dprintk("%s(%llu, %llu)\n", __func__, offset, length);
737 /* Create storage for up to three new extents e1, e2, e3 */
738 e1 = kmalloc(sizeof(*e1), GFP_ATOMIC);
739 e2 = kmalloc(sizeof(*e2), GFP_ATOMIC);
740 e3 = kmalloc(sizeof(*e3), GFP_ATOMIC);
741 /* BUG - we are ignoring any failure */
742 if (!e1 || !e2 || !e3)
743 goto out_nosplit;
744
745 spin_lock(&bl->bl_ext_lock);
746 be = bl_find_get_extent_locked(bl, offset);
747 rv = be->be_f_offset + be->be_length;
748 if (be->be_state != PNFS_BLOCK_INVALID_DATA) {
749 spin_unlock(&bl->bl_ext_lock);
750 goto out_nosplit;
751 }
752 /* Add e* to children, bumping e*'s krefs */
753 if (be->be_f_offset != offset) {
754 _prep_new_extent(e1, be, be->be_f_offset,
755 offset - be->be_f_offset,
756 PNFS_BLOCK_INVALID_DATA);
757 children[i++] = e1;
758 print_bl_extent(e1);
759 } else
760 merge1 = e1;
761 _prep_new_extent(e2, be, offset,
762 min(length, be->be_f_offset + be->be_length - offset),
763 PNFS_BLOCK_READWRITE_DATA);
764 children[i++] = e2;
765 print_bl_extent(e2);
766 if (offset + length < be->be_f_offset + be->be_length) {
767 _prep_new_extent(e3, be, e2->be_f_offset + e2->be_length,
768 be->be_f_offset + be->be_length -
769 offset - length,
770 PNFS_BLOCK_INVALID_DATA);
771 children[i++] = e3;
772 print_bl_extent(e3);
773 } else
774 merge2 = e3;
775
776 /* Remove be from list, and insert the e* */
777 /* We don't get refs on e*, since this list is the base reference
778 * set when init'ed.
779 */
780 if (i < 3)
781 children[i] = NULL;
782 new = children[0];
783 list_replace(&be->be_node, &new->be_node);
784 bl_put_extent(be);
785 new = _front_merge(new, &bl->bl_extents[RW_EXTENT], merge1);
786 for (j = 1; j < i; j++) {
787 old = new;
788 new = children[j];
789 list_add(&new->be_node, &old->be_node);
790 }
791 if (merge2) {
792 /* This is a HACK, should just create a _back_merge function */
793 new = list_entry(new->be_node.next,
794 struct pnfs_block_extent, be_node);
795 new = _front_merge(new, &bl->bl_extents[RW_EXTENT], merge2);
796 }
797 spin_unlock(&bl->bl_ext_lock);
798
799 /* Since we removed the base reference above, be is now scheduled for
800 * destruction.
801 */
802 bl_put_extent(be);
803 dprintk("%s returns %llu after split\n", __func__, rv);
804 return rv;
805
806 out_nosplit:
807 kfree(e1);
808 kfree(e2);
809 kfree(e3);
810 dprintk("%s returns %llu without splitting\n", __func__, rv);
811 return rv;
812}
813
814void
815clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
816 const struct nfs4_layoutcommit_args *arg,
817 int status)
818{
819 struct pnfs_block_short_extent *lce, *save;
820
821 dprintk("%s status %d\n", __func__, status);
822 list_for_each_entry_safe(lce, save, &bl->bl_committing, bse_node) {
823 if (likely(!status)) {
824 u64 offset = lce->bse_f_offset;
825 u64 end = offset + lce->bse_length;
826
827 do {
828 offset = set_to_rw(bl, offset, end - offset);
829 } while (offset < end);
830 list_del(&lce->bse_node);
831
832 kfree(lce);
833 } else {
834 list_del(&lce->bse_node);
835 spin_lock(&bl->bl_ext_lock);
836 add_to_commitlist(bl, lce);
837 spin_unlock(&bl->bl_ext_lock);
838 }
839 }
840}