diff options
author | Boaz Harrosh <bharrosh@panasas.com> | 2011-10-31 18:15:38 -0400 |
---|---|---|
committer | Trond Myklebust <Trond.Myklebust@netapp.com> | 2011-11-02 23:56:08 -0400 |
commit | eecfc6312a24e6d0d2883de0a9a6ccf8e993f472 (patch) | |
tree | 83b7b6d8895a007e698e59f065924527e8870d90 /fs/nfs | |
parent | af4f5b54bcf0379089d01518e818f37258708fb7 (diff) |
pnfs-obj: move to ore 02: move to ORE
In this patch we are actually moving to the ORE.
(Object Raid Engine).
objio_state holds a pointer to an ore_io_state. Once
we have an ore_io_state at hand we can call the ore
for reading/writing. We register on the done path
to kick off the nfs io_done mechanism.
Again for Ease of reviewing the old code is "#if 0"
but is not removed so the diff command works better.
The old code will be removed in the next patch.
fs/exofs/Kconfig::ORE is modified to also be auto-included
if PNFS_OBJLAYOUT is set. Since we now depend on ORE.
(See comments in fs/exofs/Kconfig)
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Diffstat (limited to 'fs/nfs')
-rw-r--r-- | fs/nfs/objlayout/objio_osd.c | 133 |
1 files changed, 59 insertions, 74 deletions
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index bd7ec26e2840..00b384934c32 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c | |||
@@ -44,12 +44,6 @@ | |||
44 | 44 | ||
45 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | 45 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD |
46 | 46 | ||
47 | #define _LLU(x) ((unsigned long long)x) | ||
48 | |||
49 | enum { BIO_MAX_PAGES_KMALLOC = | ||
50 | (PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec), | ||
51 | }; | ||
52 | |||
53 | struct objio_dev_ent { | 47 | struct objio_dev_ent { |
54 | struct nfs4_deviceid_node id_node; | 48 | struct nfs4_deviceid_node id_node; |
55 | struct ore_dev od; | 49 | struct ore_dev od; |
@@ -124,37 +118,13 @@ OBJIO_LSEG(struct pnfs_layout_segment *lseg) | |||
124 | return container_of(lseg, struct objio_segment, lseg); | 118 | return container_of(lseg, struct objio_segment, lseg); |
125 | } | 119 | } |
126 | 120 | ||
127 | struct objio_state; | ||
128 | typedef int (*objio_done_fn)(struct objio_state *ios); | ||
129 | |||
130 | struct objio_state { | 121 | struct objio_state { |
131 | /* Generic layer */ | 122 | /* Generic layer */ |
132 | struct objlayout_io_res oir; | 123 | struct objlayout_io_res oir; |
133 | 124 | ||
134 | struct page **pages; | ||
135 | unsigned pgbase; | ||
136 | unsigned nr_pages; | ||
137 | unsigned long count; | ||
138 | loff_t offset; | ||
139 | bool sync; | 125 | bool sync; |
140 | 126 | /*FIXME: Support for extra_bytes at ore_get_rw_state() */ | |
141 | struct ore_layout *layout; | 127 | struct ore_io_state *ios; |
142 | struct ore_components *oc; | ||
143 | |||
144 | struct kref kref; | ||
145 | objio_done_fn done; | ||
146 | void *private; | ||
147 | |||
148 | unsigned long length; | ||
149 | unsigned numdevs; /* Actually used devs in this IO */ | ||
150 | /* A per-device variable array of size numdevs */ | ||
151 | struct _objio_per_comp { | ||
152 | struct bio *bio; | ||
153 | struct osd_request *or; | ||
154 | unsigned long length; | ||
155 | u64 offset; | ||
156 | unsigned dev; | ||
157 | } per_dev[]; | ||
158 | }; | 128 | }; |
159 | 129 | ||
160 | /* Send and wait for a get_device_info of devices in the layout, | 130 | /* Send and wait for a get_device_info of devices in the layout, |
@@ -374,16 +344,16 @@ void objio_free_lseg(struct pnfs_layout_segment *lseg) | |||
374 | } | 344 | } |
375 | 345 | ||
376 | static int | 346 | static int |
377 | objio_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type, | 347 | objio_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type, bool is_reading, |
378 | struct pnfs_layout_segment *lseg, struct page **pages, unsigned pgbase, | 348 | struct pnfs_layout_segment *lseg, struct page **pages, unsigned pgbase, |
379 | loff_t offset, size_t count, void *rpcdata, gfp_t gfp_flags, | 349 | loff_t offset, size_t count, void *rpcdata, gfp_t gfp_flags, |
380 | struct objio_state **outp) | 350 | struct objio_state **outp) |
381 | { | 351 | { |
382 | struct objio_segment *objio_seg = OBJIO_LSEG(lseg); | 352 | struct objio_segment *objio_seg = OBJIO_LSEG(lseg); |
383 | struct objio_state *ios; | 353 | struct ore_io_state *ios; |
354 | int ret; | ||
384 | struct __alloc_objio_state { | 355 | struct __alloc_objio_state { |
385 | struct objio_state objios; | 356 | struct objio_state objios; |
386 | struct _objio_per_comp per_dev[objio_seg->oc.numdevs]; | ||
387 | struct pnfs_osd_ioerr ioerrs[objio_seg->oc.numdevs]; | 357 | struct pnfs_osd_ioerr ioerrs[objio_seg->oc.numdevs]; |
388 | } *aos; | 358 | } *aos; |
389 | 359 | ||
@@ -391,30 +361,33 @@ objio_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type, | |||
391 | if (unlikely(!aos)) | 361 | if (unlikely(!aos)) |
392 | return -ENOMEM; | 362 | return -ENOMEM; |
393 | 363 | ||
394 | ios = &aos->objios; | ||
395 | |||
396 | ios->layout = &objio_seg->layout; | ||
397 | ios->oc = &objio_seg->oc; | ||
398 | objlayout_init_ioerrs(&aos->objios.oir, objio_seg->oc.numdevs, | 364 | objlayout_init_ioerrs(&aos->objios.oir, objio_seg->oc.numdevs, |
399 | aos->ioerrs, rpcdata, pnfs_layout_type); | 365 | aos->ioerrs, rpcdata, pnfs_layout_type); |
400 | 366 | ||
367 | ret = ore_get_rw_state(&objio_seg->layout, &objio_seg->oc, is_reading, | ||
368 | offset, count, &ios); | ||
369 | if (unlikely(ret)) { | ||
370 | kfree(aos); | ||
371 | return ret; | ||
372 | } | ||
373 | |||
401 | ios->pages = pages; | 374 | ios->pages = pages; |
402 | ios->pgbase = pgbase; | 375 | ios->pgbase = pgbase; |
403 | ios->nr_pages = (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT; | 376 | ios->private = aos; |
404 | ios->offset = offset; | ||
405 | ios->count = count; | ||
406 | ios->sync = 0; | ||
407 | BUG_ON(ios->nr_pages > (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT); | 377 | BUG_ON(ios->nr_pages > (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT); |
408 | 378 | ||
409 | *outp = ios; | 379 | aos->objios.sync = 0; |
380 | aos->objios.ios = ios; | ||
381 | *outp = &aos->objios; | ||
410 | return 0; | 382 | return 0; |
411 | } | 383 | } |
412 | 384 | ||
413 | void objio_free_result(struct objlayout_io_res *oir) | 385 | void objio_free_result(struct objlayout_io_res *oir) |
414 | { | 386 | { |
415 | struct objio_state *ios = container_of(oir, struct objio_state, oir); | 387 | struct objio_state *objios = container_of(oir, struct objio_state, oir); |
416 | 388 | ||
417 | kfree(ios); | 389 | ore_put_io_state(objios->ios); |
390 | kfree(objios); | ||
418 | } | 391 | } |
419 | 392 | ||
420 | enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep) | 393 | enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep) |
@@ -447,7 +420,7 @@ enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep) | |||
447 | } | 420 | } |
448 | } | 421 | } |
449 | 422 | ||
450 | static void __on_dev_error(struct objio_state *ios, bool is_write, | 423 | static void __on_dev_error(struct ore_io_state *ios, |
451 | struct ore_dev *od, unsigned dev_index, enum osd_err_priority oep, | 424 | struct ore_dev *od, unsigned dev_index, enum osd_err_priority oep, |
452 | u64 dev_offset, u64 dev_len) | 425 | u64 dev_offset, u64 dev_len) |
453 | { | 426 | { |
@@ -465,9 +438,10 @@ static void __on_dev_error(struct objio_state *ios, bool is_write, | |||
465 | 438 | ||
466 | objlayout_io_set_result(&objios->oir, comp, | 439 | objlayout_io_set_result(&objios->oir, comp, |
467 | &pooid, osd_pri_2_pnfs_err(oep), | 440 | &pooid, osd_pri_2_pnfs_err(oep), |
468 | dev_offset, dev_len, is_write); | 441 | dev_offset, dev_len, !ios->reading); |
469 | } | 442 | } |
470 | 443 | ||
444 | #if 0 | ||
471 | static void _clear_bio(struct bio *bio) | 445 | static void _clear_bio(struct bio *bio) |
472 | { | 446 | { |
473 | struct bio_vec *bv; | 447 | struct bio_vec *bv; |
@@ -786,26 +760,28 @@ static int _io_exec(struct objio_state *ios) | |||
786 | 760 | ||
787 | return ret; | 761 | return ret; |
788 | } | 762 | } |
763 | #endif | ||
789 | 764 | ||
790 | /* | 765 | /* |
791 | * read | 766 | * read |
792 | */ | 767 | */ |
793 | static int _read_done(struct objio_state *ios) | 768 | static void _read_done(struct ore_io_state *ios, void *private) |
794 | { | 769 | { |
770 | struct objio_state *objios = private; | ||
795 | ssize_t status; | 771 | ssize_t status; |
796 | int ret = _io_check(ios, false); | 772 | int ret = ore_check_io(ios, &__on_dev_error); |
797 | 773 | ||
798 | _io_free(ios); | 774 | /* FIXME: _io_free(ios) can we dealocate the libosd resources; */ |
799 | 775 | ||
800 | if (likely(!ret)) | 776 | if (likely(!ret)) |
801 | status = ios->length; | 777 | status = ios->length; |
802 | else | 778 | else |
803 | status = ret; | 779 | status = ret; |
804 | 780 | ||
805 | objlayout_read_done(&ios->oir, status, ios->sync); | 781 | objlayout_read_done(&objios->oir, status, objios->sync); |
806 | return ret; | ||
807 | } | 782 | } |
808 | 783 | ||
784 | #if 0 | ||
809 | static int _read_mirrors(struct objio_state *ios, unsigned cur_comp) | 785 | static int _read_mirrors(struct objio_state *ios, unsigned cur_comp) |
810 | { | 786 | { |
811 | struct osd_request *or = NULL; | 787 | struct osd_request *or = NULL; |
@@ -860,49 +836,50 @@ err: | |||
860 | _io_free(ios); | 836 | _io_free(ios); |
861 | return ret; | 837 | return ret; |
862 | } | 838 | } |
839 | #endif | ||
863 | 840 | ||
864 | int objio_read_pagelist(struct nfs_read_data *rdata) | 841 | int objio_read_pagelist(struct nfs_read_data *rdata) |
865 | { | 842 | { |
866 | struct objio_state *ios; | 843 | struct objio_state *objios; |
867 | int ret; | 844 | int ret; |
868 | 845 | ||
869 | ret = objio_alloc_io_state(NFS_I(rdata->inode)->layout, | 846 | ret = objio_alloc_io_state(NFS_I(rdata->inode)->layout, true, |
870 | rdata->lseg, rdata->args.pages, rdata->args.pgbase, | 847 | rdata->lseg, rdata->args.pages, rdata->args.pgbase, |
871 | rdata->args.offset, rdata->args.count, rdata, | 848 | rdata->args.offset, rdata->args.count, rdata, |
872 | GFP_KERNEL, &ios); | 849 | GFP_KERNEL, &objios); |
873 | if (unlikely(ret)) | ||
874 | return ret; | ||
875 | |||
876 | ret = _io_rw_pagelist(ios, GFP_KERNEL); | ||
877 | if (unlikely(ret)) | 850 | if (unlikely(ret)) |
878 | return ret; | 851 | return ret; |
879 | 852 | ||
880 | return _read_exec(ios); | 853 | objios->ios->done = _read_done; |
854 | dprintk("%s: offset=0x%llx length=0x%x\n", __func__, | ||
855 | rdata->args.offset, rdata->args.count); | ||
856 | return ore_read(objios->ios); | ||
881 | } | 857 | } |
882 | 858 | ||
883 | /* | 859 | /* |
884 | * write | 860 | * write |
885 | */ | 861 | */ |
886 | static int _write_done(struct objio_state *ios) | 862 | static void _write_done(struct ore_io_state *ios, void *private) |
887 | { | 863 | { |
864 | struct objio_state *objios = private; | ||
888 | ssize_t status; | 865 | ssize_t status; |
889 | int ret = _io_check(ios, true); | 866 | int ret = ore_check_io(ios, &__on_dev_error); |
890 | 867 | ||
891 | _io_free(ios); | 868 | /* FIXME: _io_free(ios) can we dealocate the libosd resources; */ |
892 | 869 | ||
893 | if (likely(!ret)) { | 870 | if (likely(!ret)) { |
894 | /* FIXME: should be based on the OSD's persistence model | 871 | /* FIXME: should be based on the OSD's persistence model |
895 | * See OSD2r05 Section 4.13 Data persistence model */ | 872 | * See OSD2r05 Section 4.13 Data persistence model */ |
896 | ios->oir.committed = NFS_FILE_SYNC; | 873 | objios->oir.committed = NFS_FILE_SYNC; |
897 | status = ios->length; | 874 | status = ios->length; |
898 | } else { | 875 | } else { |
899 | status = ret; | 876 | status = ret; |
900 | } | 877 | } |
901 | 878 | ||
902 | objlayout_write_done(&ios->oir, status, ios->sync); | 879 | objlayout_write_done(&objios->oir, status, objios->sync); |
903 | return ret; | ||
904 | } | 880 | } |
905 | 881 | ||
882 | #if 0 | ||
906 | static int _write_mirrors(struct objio_state *ios, unsigned cur_comp) | 883 | static int _write_mirrors(struct objio_state *ios, unsigned cur_comp) |
907 | { | 884 | { |
908 | struct _objio_per_comp *master_dev = &ios->per_dev[cur_comp]; | 885 | struct _objio_per_comp *master_dev = &ios->per_dev[cur_comp]; |
@@ -984,27 +961,35 @@ err: | |||
984 | _io_free(ios); | 961 | _io_free(ios); |
985 | return ret; | 962 | return ret; |
986 | } | 963 | } |
964 | #endif | ||
987 | 965 | ||
988 | int objio_write_pagelist(struct nfs_write_data *wdata, int how) | 966 | int objio_write_pagelist(struct nfs_write_data *wdata, int how) |
989 | { | 967 | { |
990 | struct objio_state *ios; | 968 | struct objio_state *objios; |
991 | int ret; | 969 | int ret; |
992 | 970 | ||
993 | ret = objio_alloc_io_state(NFS_I(wdata->inode)->layout, | 971 | ret = objio_alloc_io_state(NFS_I(wdata->inode)->layout, false, |
994 | wdata->lseg, wdata->args.pages, wdata->args.pgbase, | 972 | wdata->lseg, wdata->args.pages, wdata->args.pgbase, |
995 | wdata->args.offset, wdata->args.count, wdata, GFP_NOFS, | 973 | wdata->args.offset, wdata->args.count, wdata, GFP_NOFS, |
996 | &ios); | 974 | &objios); |
997 | if (unlikely(ret)) | 975 | if (unlikely(ret)) |
998 | return ret; | 976 | return ret; |
999 | 977 | ||
1000 | ios->sync = 0 != (how & FLUSH_SYNC); | 978 | objios->sync = 0 != (how & FLUSH_SYNC); |
1001 | 979 | ||
1002 | /* TODO: ios->stable = stable; */ | 980 | if (!objios->sync) |
1003 | ret = _io_rw_pagelist(ios, GFP_NOFS); | 981 | objios->ios->done = _write_done; |
982 | |||
983 | dprintk("%s: offset=0x%llx length=0x%x\n", __func__, | ||
984 | wdata->args.offset, wdata->args.count); | ||
985 | ret = ore_write(objios->ios); | ||
1004 | if (unlikely(ret)) | 986 | if (unlikely(ret)) |
1005 | return ret; | 987 | return ret; |
1006 | 988 | ||
1007 | return _write_exec(ios); | 989 | if (objios->sync) |
990 | _write_done(objios->ios, objios); | ||
991 | |||
992 | return 0; | ||
1008 | } | 993 | } |
1009 | 994 | ||
1010 | static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, | 995 | static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, |