diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-11-04 15:27:43 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-11-04 15:27:43 -0400 |
| commit | 6736c047995c560b73f3860095c631456b0bbea8 (patch) | |
| tree | 1ef4d2ab9d7d2f2cd3fe26a3e9135fb312e81b85 | |
| parent | 16dfd1faed8c5235d9a7c190b91b6d97d6cd3272 (diff) | |
| parent | 6070295efc90d1093b2031c43380bd7d9673c802 (diff) | |
Merge branch 'nfs-for-3.2' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
* 'nfs-for-3.2' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (25 commits)
nfs: set vs_hidden on nfs4_callback_version4 (try #2)
pnfs-obj: Support for RAID5 read-4-write interface.
pnfs-obj: move to ore 03: Remove old raid engine
pnfs-obj: move to ore 02: move to ORE
pnfs-obj: move to ore 01: ore_layout & ore_components
pnfs-obj: Rename objlayout_io_state => objlayout_io_res
pnfs-obj: Get rid of objlayout_{alloc,free}_io_state
pnfs-obj: Return PNFS_NOT_ATTEMPTED in case of read/write_pagelist
pnfs-obj: Remove redundant EOF from objlayout_io_state
nfs: Remove unused variable from write.c
nfs: Fix unused variable warning from file.c
NFS: Remove no-op less-than-zero checks on unsigned variables.
NFS: Clean up nfs4_xdr_dec_secinfo()
NFS: Fix documenting comment for nfs_create_request()
NFS4: fix cb_recallany decode error
nfs4: serialize layoutcommit
SUNRPC: remove rpcbind clients destruction on module cleanup
SUNRPC: remove rpcbind clients creation during service registering
NFSd: call svc rpcbind cleanup explicitly
SUNRPC: cleanup service destruction
...
| -rw-r--r-- | fs/exofs/Kconfig | 2 | ||||
| -rw-r--r-- | fs/nfs/callback_xdr.c | 12 | ||||
| -rw-r--r-- | fs/nfs/file.c | 9 | ||||
| -rw-r--r-- | fs/nfs/nfs4filelayout.c | 7 | ||||
| -rw-r--r-- | fs/nfs/nfs4proc.c | 6 | ||||
| -rw-r--r-- | fs/nfs/nfs4xdr.c | 2 | ||||
| -rw-r--r-- | fs/nfs/objlayout/objio_osd.c | 872 | ||||
| -rw-r--r-- | fs/nfs/objlayout/objlayout.c | 209 | ||||
| -rw-r--r-- | fs/nfs/objlayout/objlayout.h | 48 | ||||
| -rw-r--r-- | fs/nfs/pagelist.c | 2 | ||||
| -rw-r--r-- | fs/nfs/pnfs.c | 25 | ||||
| -rw-r--r-- | fs/nfs/write.c | 3 | ||||
| -rw-r--r-- | fs/nfsd/nfssvc.c | 2 | ||||
| -rw-r--r-- | include/linux/nfs_fs.h | 1 | ||||
| -rw-r--r-- | include/linux/sunrpc/clnt.h | 2 | ||||
| -rw-r--r-- | include/linux/sunrpc/svc.h | 1 | ||||
| -rw-r--r-- | net/sunrpc/auth_unix.c | 3 | ||||
| -rw-r--r-- | net/sunrpc/rpcb_clnt.c | 88 | ||||
| -rw-r--r-- | net/sunrpc/sunrpc_syms.c | 3 | ||||
| -rw-r--r-- | net/sunrpc/svc.c | 48 |
20 files changed, 457 insertions, 888 deletions
diff --git a/fs/exofs/Kconfig b/fs/exofs/Kconfig index fa9a286c8771..da42f32c49be 100644 --- a/fs/exofs/Kconfig +++ b/fs/exofs/Kconfig | |||
| @@ -5,7 +5,7 @@ | |||
| 5 | # selected by any of the users. | 5 | # selected by any of the users. |
| 6 | config ORE | 6 | config ORE |
| 7 | tristate | 7 | tristate |
| 8 | depends on EXOFS_FS | 8 | depends on EXOFS_FS || PNFS_OBJLAYOUT |
| 9 | select ASYNC_XOR | 9 | select ASYNC_XOR |
| 10 | default SCSI_OSD_ULD | 10 | default SCSI_OSD_ULD |
| 11 | 11 | ||
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 918ad647afea..726e59a9e50f 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c | |||
| @@ -488,17 +488,18 @@ static __be32 decode_recallany_args(struct svc_rqst *rqstp, | |||
| 488 | struct xdr_stream *xdr, | 488 | struct xdr_stream *xdr, |
| 489 | struct cb_recallanyargs *args) | 489 | struct cb_recallanyargs *args) |
| 490 | { | 490 | { |
| 491 | __be32 *p; | 491 | uint32_t bitmap[2]; |
| 492 | __be32 *p, status; | ||
| 492 | 493 | ||
| 493 | args->craa_addr = svc_addr(rqstp); | 494 | args->craa_addr = svc_addr(rqstp); |
| 494 | p = read_buf(xdr, 4); | 495 | p = read_buf(xdr, 4); |
| 495 | if (unlikely(p == NULL)) | 496 | if (unlikely(p == NULL)) |
| 496 | return htonl(NFS4ERR_BADXDR); | 497 | return htonl(NFS4ERR_BADXDR); |
| 497 | args->craa_objs_to_keep = ntohl(*p++); | 498 | args->craa_objs_to_keep = ntohl(*p++); |
| 498 | p = read_buf(xdr, 4); | 499 | status = decode_bitmap(xdr, bitmap); |
| 499 | if (unlikely(p == NULL)) | 500 | if (unlikely(status)) |
| 500 | return htonl(NFS4ERR_BADXDR); | 501 | return status; |
| 501 | args->craa_type_mask = ntohl(*p); | 502 | args->craa_type_mask = bitmap[0]; |
| 502 | 503 | ||
| 503 | return 0; | 504 | return 0; |
| 504 | } | 505 | } |
| @@ -986,4 +987,5 @@ struct svc_version nfs4_callback_version4 = { | |||
| 986 | .vs_proc = nfs4_callback_procedures1, | 987 | .vs_proc = nfs4_callback_procedures1, |
| 987 | .vs_xdrsize = NFS4_CALLBACK_XDRSIZE, | 988 | .vs_xdrsize = NFS4_CALLBACK_XDRSIZE, |
| 988 | .vs_dispatch = NULL, | 989 | .vs_dispatch = NULL, |
| 990 | .vs_hidden = 1, | ||
| 989 | }; | 991 | }; |
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 91c01f0a4c3b..0a1f8312b4dc 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
| @@ -137,11 +137,9 @@ nfs_file_open(struct inode *inode, struct file *filp) | |||
| 137 | static int | 137 | static int |
| 138 | nfs_file_release(struct inode *inode, struct file *filp) | 138 | nfs_file_release(struct inode *inode, struct file *filp) |
| 139 | { | 139 | { |
| 140 | struct dentry *dentry = filp->f_path.dentry; | ||
| 141 | |||
| 142 | dprintk("NFS: release(%s/%s)\n", | 140 | dprintk("NFS: release(%s/%s)\n", |
| 143 | dentry->d_parent->d_name.name, | 141 | filp->f_path.dentry->d_parent->d_name.name, |
| 144 | dentry->d_name.name); | 142 | filp->f_path.dentry->d_name.name); |
| 145 | 143 | ||
| 146 | nfs_inc_stats(inode, NFSIOS_VFSRELEASE); | 144 | nfs_inc_stats(inode, NFSIOS_VFSRELEASE); |
| 147 | return nfs_release(inode, filp); | 145 | return nfs_release(inode, filp); |
| @@ -228,14 +226,13 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov, | |||
| 228 | struct dentry * dentry = iocb->ki_filp->f_path.dentry; | 226 | struct dentry * dentry = iocb->ki_filp->f_path.dentry; |
| 229 | struct inode * inode = dentry->d_inode; | 227 | struct inode * inode = dentry->d_inode; |
| 230 | ssize_t result; | 228 | ssize_t result; |
| 231 | size_t count = iov_length(iov, nr_segs); | ||
| 232 | 229 | ||
| 233 | if (iocb->ki_filp->f_flags & O_DIRECT) | 230 | if (iocb->ki_filp->f_flags & O_DIRECT) |
| 234 | return nfs_file_direct_read(iocb, iov, nr_segs, pos); | 231 | return nfs_file_direct_read(iocb, iov, nr_segs, pos); |
| 235 | 232 | ||
| 236 | dprintk("NFS: read(%s/%s, %lu@%lu)\n", | 233 | dprintk("NFS: read(%s/%s, %lu@%lu)\n", |
| 237 | dentry->d_parent->d_name.name, dentry->d_name.name, | 234 | dentry->d_parent->d_name.name, dentry->d_name.name, |
| 238 | (unsigned long) count, (unsigned long) pos); | 235 | (unsigned long) iov_length(iov, nr_segs), (unsigned long) pos); |
| 239 | 236 | ||
| 240 | result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); | 237 | result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); |
| 241 | if (!result) { | 238 | if (!result) { |
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 09119418402f..12185aadb349 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c | |||
| @@ -449,9 +449,8 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, | |||
| 449 | 449 | ||
| 450 | fl->dsaddr = dsaddr; | 450 | fl->dsaddr = dsaddr; |
| 451 | 451 | ||
| 452 | if (fl->first_stripe_index < 0 || | 452 | if (fl->first_stripe_index >= dsaddr->stripe_count) { |
| 453 | fl->first_stripe_index >= dsaddr->stripe_count) { | 453 | dprintk("%s Bad first_stripe_index %u\n", |
| 454 | dprintk("%s Bad first_stripe_index %d\n", | ||
| 455 | __func__, fl->first_stripe_index); | 454 | __func__, fl->first_stripe_index); |
| 456 | goto out_put; | 455 | goto out_put; |
| 457 | } | 456 | } |
| @@ -552,7 +551,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, | |||
| 552 | 551 | ||
| 553 | /* Note that a zero value for num_fh is legal for STRIPE_SPARSE. | 552 | /* Note that a zero value for num_fh is legal for STRIPE_SPARSE. |
| 554 | * Futher checking is done in filelayout_check_layout */ | 553 | * Futher checking is done in filelayout_check_layout */ |
| 555 | if (fl->num_fh < 0 || fl->num_fh > | 554 | if (fl->num_fh > |
| 556 | max(NFS4_PNFS_MAX_STRIPE_CNT, NFS4_PNFS_MAX_MULTI_CNT)) | 555 | max(NFS4_PNFS_MAX_STRIPE_CNT, NFS4_PNFS_MAX_MULTI_CNT)) |
| 557 | goto out_err; | 556 | goto out_err; |
| 558 | 557 | ||
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d2ae413c986a..b60fddf606f7 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
| @@ -5950,6 +5950,7 @@ static void nfs4_layoutcommit_release(void *calldata) | |||
| 5950 | { | 5950 | { |
| 5951 | struct nfs4_layoutcommit_data *data = calldata; | 5951 | struct nfs4_layoutcommit_data *data = calldata; |
| 5952 | struct pnfs_layout_segment *lseg, *tmp; | 5952 | struct pnfs_layout_segment *lseg, *tmp; |
| 5953 | unsigned long *bitlock = &NFS_I(data->args.inode)->flags; | ||
| 5953 | 5954 | ||
| 5954 | pnfs_cleanup_layoutcommit(data); | 5955 | pnfs_cleanup_layoutcommit(data); |
| 5955 | /* Matched by references in pnfs_set_layoutcommit */ | 5956 | /* Matched by references in pnfs_set_layoutcommit */ |
| @@ -5959,6 +5960,11 @@ static void nfs4_layoutcommit_release(void *calldata) | |||
| 5959 | &lseg->pls_flags)) | 5960 | &lseg->pls_flags)) |
| 5960 | put_lseg(lseg); | 5961 | put_lseg(lseg); |
| 5961 | } | 5962 | } |
| 5963 | |||
| 5964 | clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock); | ||
| 5965 | smp_mb__after_clear_bit(); | ||
| 5966 | wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING); | ||
| 5967 | |||
| 5962 | put_rpccred(data->cred); | 5968 | put_rpccred(data->cred); |
| 5963 | kfree(data); | 5969 | kfree(data); |
| 5964 | } | 5970 | } |
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 1dce12f41a4f..e6161b213ed1 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c | |||
| @@ -6602,8 +6602,6 @@ static int nfs4_xdr_dec_secinfo(struct rpc_rqst *rqstp, | |||
| 6602 | if (status) | 6602 | if (status) |
| 6603 | goto out; | 6603 | goto out; |
| 6604 | status = decode_secinfo(xdr, res); | 6604 | status = decode_secinfo(xdr, res); |
| 6605 | if (status) | ||
| 6606 | goto out; | ||
| 6607 | out: | 6605 | out: |
| 6608 | return status; | 6606 | return status; |
| 6609 | } | 6607 | } |
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index d0cda12fddc3..c807ab93140e 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c | |||
| @@ -38,21 +38,15 @@ | |||
| 38 | */ | 38 | */ |
| 39 | 39 | ||
| 40 | #include <linux/module.h> | 40 | #include <linux/module.h> |
| 41 | #include <scsi/osd_initiator.h> | 41 | #include <scsi/osd_ore.h> |
| 42 | 42 | ||
| 43 | #include "objlayout.h" | 43 | #include "objlayout.h" |
| 44 | 44 | ||
| 45 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | 45 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD |
| 46 | 46 | ||
| 47 | #define _LLU(x) ((unsigned long long)x) | ||
| 48 | |||
| 49 | enum { BIO_MAX_PAGES_KMALLOC = | ||
| 50 | (PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec), | ||
| 51 | }; | ||
| 52 | |||
| 53 | struct objio_dev_ent { | 47 | struct objio_dev_ent { |
| 54 | struct nfs4_deviceid_node id_node; | 48 | struct nfs4_deviceid_node id_node; |
| 55 | struct osd_dev *od; | 49 | struct ore_dev od; |
| 56 | }; | 50 | }; |
| 57 | 51 | ||
| 58 | static void | 52 | static void |
| @@ -60,8 +54,8 @@ objio_free_deviceid_node(struct nfs4_deviceid_node *d) | |||
| 60 | { | 54 | { |
| 61 | struct objio_dev_ent *de = container_of(d, struct objio_dev_ent, id_node); | 55 | struct objio_dev_ent *de = container_of(d, struct objio_dev_ent, id_node); |
| 62 | 56 | ||
| 63 | dprintk("%s: free od=%p\n", __func__, de->od); | 57 | dprintk("%s: free od=%p\n", __func__, de->od.od); |
| 64 | osduld_put_device(de->od); | 58 | osduld_put_device(de->od.od); |
| 65 | kfree(de); | 59 | kfree(de); |
| 66 | } | 60 | } |
| 67 | 61 | ||
| @@ -98,12 +92,12 @@ _dev_list_add(const struct nfs_server *nfss, | |||
| 98 | nfss->pnfs_curr_ld, | 92 | nfss->pnfs_curr_ld, |
| 99 | nfss->nfs_client, | 93 | nfss->nfs_client, |
| 100 | d_id); | 94 | d_id); |
| 101 | de->od = od; | 95 | de->od.od = od; |
| 102 | 96 | ||
| 103 | d = nfs4_insert_deviceid_node(&de->id_node); | 97 | d = nfs4_insert_deviceid_node(&de->id_node); |
| 104 | n = container_of(d, struct objio_dev_ent, id_node); | 98 | n = container_of(d, struct objio_dev_ent, id_node); |
| 105 | if (n != de) { | 99 | if (n != de) { |
| 106 | dprintk("%s: Race with other n->od=%p\n", __func__, n->od); | 100 | dprintk("%s: Race with other n->od=%p\n", __func__, n->od.od); |
| 107 | objio_free_deviceid_node(&de->id_node); | 101 | objio_free_deviceid_node(&de->id_node); |
| 108 | de = n; | 102 | de = n; |
| 109 | } | 103 | } |
| @@ -111,28 +105,11 @@ _dev_list_add(const struct nfs_server *nfss, | |||
| 111 | return de; | 105 | return de; |
| 112 | } | 106 | } |
| 113 | 107 | ||
| 114 | struct caps_buffers { | ||
| 115 | u8 caps_key[OSD_CRYPTO_KEYID_SIZE]; | ||
| 116 | u8 creds[OSD_CAP_LEN]; | ||
| 117 | }; | ||
| 118 | |||
| 119 | struct objio_segment { | 108 | struct objio_segment { |
| 120 | struct pnfs_layout_segment lseg; | 109 | struct pnfs_layout_segment lseg; |
| 121 | 110 | ||
| 122 | struct pnfs_osd_object_cred *comps; | 111 | struct ore_layout layout; |
| 123 | 112 | struct ore_components oc; | |
| 124 | unsigned mirrors_p1; | ||
| 125 | unsigned stripe_unit; | ||
| 126 | unsigned group_width; /* Data stripe_units without integrity comps */ | ||
| 127 | u64 group_depth; | ||
| 128 | unsigned group_count; | ||
| 129 | |||
| 130 | unsigned max_io_size; | ||
| 131 | |||
| 132 | unsigned comps_index; | ||
| 133 | unsigned num_comps; | ||
| 134 | /* variable length */ | ||
| 135 | struct objio_dev_ent *ods[]; | ||
| 136 | }; | 113 | }; |
| 137 | 114 | ||
| 138 | static inline struct objio_segment * | 115 | static inline struct objio_segment * |
| @@ -141,59 +118,44 @@ OBJIO_LSEG(struct pnfs_layout_segment *lseg) | |||
| 141 | return container_of(lseg, struct objio_segment, lseg); | 118 | return container_of(lseg, struct objio_segment, lseg); |
| 142 | } | 119 | } |
| 143 | 120 | ||
| 144 | struct objio_state; | ||
| 145 | typedef ssize_t (*objio_done_fn)(struct objio_state *ios); | ||
| 146 | |||
| 147 | struct objio_state { | 121 | struct objio_state { |
| 148 | /* Generic layer */ | 122 | /* Generic layer */ |
| 149 | struct objlayout_io_state ol_state; | 123 | struct objlayout_io_res oir; |
| 150 | 124 | ||
| 151 | struct objio_segment *layout; | 125 | bool sync; |
| 152 | 126 | /*FIXME: Support for extra_bytes at ore_get_rw_state() */ | |
| 153 | struct kref kref; | 127 | struct ore_io_state *ios; |
| 154 | objio_done_fn done; | ||
| 155 | void *private; | ||
| 156 | |||
| 157 | unsigned long length; | ||
| 158 | unsigned numdevs; /* Actually used devs in this IO */ | ||
| 159 | /* A per-device variable array of size numdevs */ | ||
| 160 | struct _objio_per_comp { | ||
| 161 | struct bio *bio; | ||
| 162 | struct osd_request *or; | ||
| 163 | unsigned long length; | ||
| 164 | u64 offset; | ||
| 165 | unsigned dev; | ||
| 166 | } per_dev[]; | ||
| 167 | }; | 128 | }; |
| 168 | 129 | ||
| 169 | /* Send and wait for a get_device_info of devices in the layout, | 130 | /* Send and wait for a get_device_info of devices in the layout, |
| 170 | then look them up with the osd_initiator library */ | 131 | then look them up with the osd_initiator library */ |
| 171 | static struct objio_dev_ent *_device_lookup(struct pnfs_layout_hdr *pnfslay, | 132 | static int objio_devices_lookup(struct pnfs_layout_hdr *pnfslay, |
| 172 | struct objio_segment *objio_seg, unsigned comp, | 133 | struct objio_segment *objio_seg, unsigned c, struct nfs4_deviceid *d_id, |
| 173 | gfp_t gfp_flags) | 134 | gfp_t gfp_flags) |
| 174 | { | 135 | { |
| 175 | struct pnfs_osd_deviceaddr *deviceaddr; | 136 | struct pnfs_osd_deviceaddr *deviceaddr; |
| 176 | struct nfs4_deviceid *d_id; | ||
| 177 | struct objio_dev_ent *ode; | 137 | struct objio_dev_ent *ode; |
| 178 | struct osd_dev *od; | 138 | struct osd_dev *od; |
| 179 | struct osd_dev_info odi; | 139 | struct osd_dev_info odi; |
| 180 | int err; | 140 | int err; |
| 181 | 141 | ||
| 182 | d_id = &objio_seg->comps[comp].oc_object_id.oid_device_id; | ||
| 183 | |||
| 184 | ode = _dev_list_find(NFS_SERVER(pnfslay->plh_inode), d_id); | 142 | ode = _dev_list_find(NFS_SERVER(pnfslay->plh_inode), d_id); |
| 185 | if (ode) | 143 | if (ode) { |
| 186 | return ode; | 144 | objio_seg->oc.ods[c] = &ode->od; /* must use container_of */ |
| 145 | return 0; | ||
| 146 | } | ||
| 187 | 147 | ||
| 188 | err = objlayout_get_deviceinfo(pnfslay, d_id, &deviceaddr, gfp_flags); | 148 | err = objlayout_get_deviceinfo(pnfslay, d_id, &deviceaddr, gfp_flags); |
| 189 | if (unlikely(err)) { | 149 | if (unlikely(err)) { |
| 190 | dprintk("%s: objlayout_get_deviceinfo dev(%llx:%llx) =>%d\n", | 150 | dprintk("%s: objlayout_get_deviceinfo dev(%llx:%llx) =>%d\n", |
| 191 | __func__, _DEVID_LO(d_id), _DEVID_HI(d_id), err); | 151 | __func__, _DEVID_LO(d_id), _DEVID_HI(d_id), err); |
| 192 | return ERR_PTR(err); | 152 | return err; |
| 193 | } | 153 | } |
| 194 | 154 | ||
| 195 | odi.systemid_len = deviceaddr->oda_systemid.len; | 155 | odi.systemid_len = deviceaddr->oda_systemid.len; |
| 196 | if (odi.systemid_len > sizeof(odi.systemid)) { | 156 | if (odi.systemid_len > sizeof(odi.systemid)) { |
| 157 | dprintk("%s: odi.systemid_len > sizeof(systemid=%zd)\n", | ||
| 158 | __func__, sizeof(odi.systemid)); | ||
| 197 | err = -EINVAL; | 159 | err = -EINVAL; |
| 198 | goto out; | 160 | goto out; |
| 199 | } else if (odi.systemid_len) | 161 | } else if (odi.systemid_len) |
| @@ -218,96 +180,53 @@ static struct objio_dev_ent *_device_lookup(struct pnfs_layout_hdr *pnfslay, | |||
| 218 | 180 | ||
| 219 | ode = _dev_list_add(NFS_SERVER(pnfslay->plh_inode), d_id, od, | 181 | ode = _dev_list_add(NFS_SERVER(pnfslay->plh_inode), d_id, od, |
| 220 | gfp_flags); | 182 | gfp_flags); |
| 221 | 183 | objio_seg->oc.ods[c] = &ode->od; /* must use container_of */ | |
| 184 | dprintk("Adding new dev_id(%llx:%llx)\n", | ||
| 185 | _DEVID_LO(d_id), _DEVID_HI(d_id)); | ||
| 222 | out: | 186 | out: |
| 223 | dprintk("%s: return=%d\n", __func__, err); | ||
| 224 | objlayout_put_deviceinfo(deviceaddr); | 187 | objlayout_put_deviceinfo(deviceaddr); |
| 225 | return err ? ERR_PTR(err) : ode; | 188 | return err; |
| 226 | } | 189 | } |
| 227 | 190 | ||
| 228 | static int objio_devices_lookup(struct pnfs_layout_hdr *pnfslay, | 191 | static void copy_single_comp(struct ore_components *oc, unsigned c, |
| 229 | struct objio_segment *objio_seg, | 192 | struct pnfs_osd_object_cred *src_comp) |
| 230 | gfp_t gfp_flags) | ||
| 231 | { | 193 | { |
| 232 | unsigned i; | 194 | struct ore_comp *ocomp = &oc->comps[c]; |
| 233 | int err; | ||
| 234 | 195 | ||
| 235 | /* lookup all devices */ | 196 | WARN_ON(src_comp->oc_cap_key.cred_len > 0); /* libosd is NO_SEC only */ |
| 236 | for (i = 0; i < objio_seg->num_comps; i++) { | 197 | WARN_ON(src_comp->oc_cap.cred_len > sizeof(ocomp->cred)); |
| 237 | struct objio_dev_ent *ode; | ||
| 238 | 198 | ||
| 239 | ode = _device_lookup(pnfslay, objio_seg, i, gfp_flags); | 199 | ocomp->obj.partition = src_comp->oc_object_id.oid_partition_id; |
| 240 | if (unlikely(IS_ERR(ode))) { | 200 | ocomp->obj.id = src_comp->oc_object_id.oid_object_id; |
| 241 | err = PTR_ERR(ode); | ||
| 242 | goto out; | ||
| 243 | } | ||
| 244 | objio_seg->ods[i] = ode; | ||
| 245 | } | ||
| 246 | err = 0; | ||
| 247 | 201 | ||
| 248 | out: | 202 | memcpy(ocomp->cred, src_comp->oc_cap.cred, sizeof(ocomp->cred)); |
| 249 | dprintk("%s: return=%d\n", __func__, err); | ||
| 250 | return err; | ||
| 251 | } | 203 | } |
| 252 | 204 | ||
| 253 | static int _verify_data_map(struct pnfs_osd_layout *layout) | 205 | int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags, |
| 206 | struct objio_segment **pseg) | ||
| 254 | { | 207 | { |
| 255 | struct pnfs_osd_data_map *data_map = &layout->olo_map; | 208 | struct __alloc_objio_segment { |
| 256 | u64 stripe_length; | 209 | struct objio_segment olseg; |
| 257 | u32 group_width; | 210 | struct ore_dev *ods[numdevs]; |
| 258 | 211 | struct ore_comp comps[numdevs]; | |
| 259 | /* FIXME: Only raid0 for now. if not go through MDS */ | 212 | } *aolseg; |
| 260 | if (data_map->odm_raid_algorithm != PNFS_OSD_RAID_0) { | ||
| 261 | printk(KERN_ERR "Only RAID_0 for now\n"); | ||
| 262 | return -ENOTSUPP; | ||
| 263 | } | ||
| 264 | if (0 != (data_map->odm_num_comps % (data_map->odm_mirror_cnt + 1))) { | ||
| 265 | printk(KERN_ERR "Data Map wrong, num_comps=%u mirrors=%u\n", | ||
| 266 | data_map->odm_num_comps, data_map->odm_mirror_cnt); | ||
| 267 | return -EINVAL; | ||
| 268 | } | ||
| 269 | 213 | ||
| 270 | if (data_map->odm_group_width) | 214 | aolseg = kzalloc(sizeof(*aolseg), gfp_flags); |
| 271 | group_width = data_map->odm_group_width; | 215 | if (unlikely(!aolseg)) { |
| 272 | else | 216 | dprintk("%s: Faild allocation numdevs=%d size=%zd\n", __func__, |
| 273 | group_width = data_map->odm_num_comps / | 217 | numdevs, sizeof(*aolseg)); |
| 274 | (data_map->odm_mirror_cnt + 1); | 218 | return -ENOMEM; |
| 275 | |||
| 276 | stripe_length = (u64)data_map->odm_stripe_unit * group_width; | ||
| 277 | if (stripe_length >= (1ULL << 32)) { | ||
| 278 | printk(KERN_ERR "Total Stripe length(0x%llx)" | ||
| 279 | " >= 32bit is not supported\n", _LLU(stripe_length)); | ||
| 280 | return -ENOTSUPP; | ||
| 281 | } | 219 | } |
| 282 | 220 | ||
| 283 | if (0 != (data_map->odm_stripe_unit & ~PAGE_MASK)) { | 221 | aolseg->olseg.oc.numdevs = numdevs; |
| 284 | printk(KERN_ERR "Stripe Unit(0x%llx)" | 222 | aolseg->olseg.oc.single_comp = EC_MULTPLE_COMPS; |
| 285 | " must be Multples of PAGE_SIZE(0x%lx)\n", | 223 | aolseg->olseg.oc.comps = aolseg->comps; |
| 286 | _LLU(data_map->odm_stripe_unit), PAGE_SIZE); | 224 | aolseg->olseg.oc.ods = aolseg->ods; |
| 287 | return -ENOTSUPP; | ||
| 288 | } | ||
| 289 | 225 | ||
| 226 | *pseg = &aolseg->olseg; | ||
| 290 | return 0; | 227 | return 0; |
| 291 | } | 228 | } |
| 292 | 229 | ||
| 293 | static void copy_single_comp(struct pnfs_osd_object_cred *cur_comp, | ||
| 294 | struct pnfs_osd_object_cred *src_comp, | ||
| 295 | struct caps_buffers *caps_p) | ||
| 296 | { | ||
| 297 | WARN_ON(src_comp->oc_cap_key.cred_len > sizeof(caps_p->caps_key)); | ||
| 298 | WARN_ON(src_comp->oc_cap.cred_len > sizeof(caps_p->creds)); | ||
| 299 | |||
| 300 | *cur_comp = *src_comp; | ||
| 301 | |||
| 302 | memcpy(caps_p->caps_key, src_comp->oc_cap_key.cred, | ||
| 303 | sizeof(caps_p->caps_key)); | ||
| 304 | cur_comp->oc_cap_key.cred = caps_p->caps_key; | ||
| 305 | |||
| 306 | memcpy(caps_p->creds, src_comp->oc_cap.cred, | ||
| 307 | sizeof(caps_p->creds)); | ||
| 308 | cur_comp->oc_cap.cred = caps_p->creds; | ||
| 309 | } | ||
| 310 | |||
| 311 | int objio_alloc_lseg(struct pnfs_layout_segment **outp, | 230 | int objio_alloc_lseg(struct pnfs_layout_segment **outp, |
| 312 | struct pnfs_layout_hdr *pnfslay, | 231 | struct pnfs_layout_hdr *pnfslay, |
| 313 | struct pnfs_layout_range *range, | 232 | struct pnfs_layout_range *range, |
| @@ -317,59 +236,43 @@ int objio_alloc_lseg(struct pnfs_layout_segment **outp, | |||
| 317 | struct objio_segment *objio_seg; | 236 | struct objio_segment *objio_seg; |
| 318 | struct pnfs_osd_xdr_decode_layout_iter iter; | 237 | struct pnfs_osd_xdr_decode_layout_iter iter; |
| 319 | struct pnfs_osd_layout layout; | 238 | struct pnfs_osd_layout layout; |
| 320 | struct pnfs_osd_object_cred *cur_comp, src_comp; | 239 | struct pnfs_osd_object_cred src_comp; |
| 321 | struct caps_buffers *caps_p; | 240 | unsigned cur_comp; |
| 322 | int err; | 241 | int err; |
| 323 | 242 | ||
| 324 | err = pnfs_osd_xdr_decode_layout_map(&layout, &iter, xdr); | 243 | err = pnfs_osd_xdr_decode_layout_map(&layout, &iter, xdr); |
| 325 | if (unlikely(err)) | 244 | if (unlikely(err)) |
| 326 | return err; | 245 | return err; |
| 327 | 246 | ||
| 328 | err = _verify_data_map(&layout); | 247 | err = __alloc_objio_seg(layout.olo_num_comps, gfp_flags, &objio_seg); |
| 329 | if (unlikely(err)) | 248 | if (unlikely(err)) |
| 330 | return err; | 249 | return err; |
| 331 | 250 | ||
| 332 | objio_seg = kzalloc(sizeof(*objio_seg) + | 251 | objio_seg->layout.stripe_unit = layout.olo_map.odm_stripe_unit; |
| 333 | sizeof(objio_seg->ods[0]) * layout.olo_num_comps + | 252 | objio_seg->layout.group_width = layout.olo_map.odm_group_width; |
| 334 | sizeof(*objio_seg->comps) * layout.olo_num_comps + | 253 | objio_seg->layout.group_depth = layout.olo_map.odm_group_depth; |
| 335 | sizeof(struct caps_buffers) * layout.olo_num_comps, | 254 | objio_seg->layout.mirrors_p1 = layout.olo_map.odm_mirror_cnt + 1; |
| 336 | gfp_flags); | 255 | objio_seg->layout.raid_algorithm = layout.olo_map.odm_raid_algorithm; |
| 337 | if (!objio_seg) | ||
| 338 | return -ENOMEM; | ||
| 339 | 256 | ||
| 340 | objio_seg->comps = (void *)(objio_seg->ods + layout.olo_num_comps); | 257 | err = ore_verify_layout(layout.olo_map.odm_num_comps, |
| 341 | cur_comp = objio_seg->comps; | 258 | &objio_seg->layout); |
| 342 | caps_p = (void *)(cur_comp + layout.olo_num_comps); | ||
| 343 | while (pnfs_osd_xdr_decode_layout_comp(&src_comp, &iter, xdr, &err)) | ||
| 344 | copy_single_comp(cur_comp++, &src_comp, caps_p++); | ||
| 345 | if (unlikely(err)) | 259 | if (unlikely(err)) |
| 346 | goto err; | 260 | goto err; |
| 347 | 261 | ||
| 348 | objio_seg->num_comps = layout.olo_num_comps; | 262 | objio_seg->oc.first_dev = layout.olo_comps_index; |
| 349 | objio_seg->comps_index = layout.olo_comps_index; | 263 | cur_comp = 0; |
| 350 | err = objio_devices_lookup(pnfslay, objio_seg, gfp_flags); | 264 | while (pnfs_osd_xdr_decode_layout_comp(&src_comp, &iter, xdr, &err)) { |
| 351 | if (err) | 265 | copy_single_comp(&objio_seg->oc, cur_comp, &src_comp); |
| 352 | goto err; | 266 | err = objio_devices_lookup(pnfslay, objio_seg, cur_comp, |
| 353 | 267 | &src_comp.oc_object_id.oid_device_id, | |
| 354 | objio_seg->mirrors_p1 = layout.olo_map.odm_mirror_cnt + 1; | 268 | gfp_flags); |
| 355 | objio_seg->stripe_unit = layout.olo_map.odm_stripe_unit; | 269 | if (err) |
| 356 | if (layout.olo_map.odm_group_width) { | 270 | goto err; |
| 357 | objio_seg->group_width = layout.olo_map.odm_group_width; | 271 | ++cur_comp; |
| 358 | objio_seg->group_depth = layout.olo_map.odm_group_depth; | ||
| 359 | objio_seg->group_count = layout.olo_map.odm_num_comps / | ||
| 360 | objio_seg->mirrors_p1 / | ||
| 361 | objio_seg->group_width; | ||
| 362 | } else { | ||
| 363 | objio_seg->group_width = layout.olo_map.odm_num_comps / | ||
| 364 | objio_seg->mirrors_p1; | ||
| 365 | objio_seg->group_depth = -1; | ||
| 366 | objio_seg->group_count = 1; | ||
| 367 | } | 272 | } |
| 368 | 273 | /* pnfs_osd_xdr_decode_layout_comp returns false on error */ | |
| 369 | /* Cache this calculation it will hit for every page */ | 274 | if (unlikely(err)) |
| 370 | objio_seg->max_io_size = (BIO_MAX_PAGES_KMALLOC * PAGE_SIZE - | 275 | goto err; |
| 371 | objio_seg->stripe_unit) * | ||
| 372 | objio_seg->group_width; | ||
| 373 | 276 | ||
| 374 | *outp = &objio_seg->lseg; | 277 | *outp = &objio_seg->lseg; |
| 375 | return 0; | 278 | return 0; |
| @@ -386,43 +289,63 @@ void objio_free_lseg(struct pnfs_layout_segment *lseg) | |||
| 386 | int i; | 289 | int i; |
| 387 | struct objio_segment *objio_seg = OBJIO_LSEG(lseg); | 290 | struct objio_segment *objio_seg = OBJIO_LSEG(lseg); |
| 388 | 291 | ||
| 389 | for (i = 0; i < objio_seg->num_comps; i++) { | 292 | for (i = 0; i < objio_seg->oc.numdevs; i++) { |
| 390 | if (!objio_seg->ods[i]) | 293 | struct ore_dev *od = objio_seg->oc.ods[i]; |
| 294 | struct objio_dev_ent *ode; | ||
| 295 | |||
| 296 | if (!od) | ||
| 391 | break; | 297 | break; |
| 392 | nfs4_put_deviceid_node(&objio_seg->ods[i]->id_node); | 298 | ode = container_of(od, typeof(*ode), od); |
| 299 | nfs4_put_deviceid_node(&ode->id_node); | ||
| 393 | } | 300 | } |
| 394 | kfree(objio_seg); | 301 | kfree(objio_seg); |
| 395 | } | 302 | } |
| 396 | 303 | ||
| 397 | int objio_alloc_io_state(struct pnfs_layout_segment *lseg, | 304 | static int |
| 398 | struct objlayout_io_state **outp, | 305 | objio_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type, bool is_reading, |
| 399 | gfp_t gfp_flags) | 306 | struct pnfs_layout_segment *lseg, struct page **pages, unsigned pgbase, |
| 307 | loff_t offset, size_t count, void *rpcdata, gfp_t gfp_flags, | ||
| 308 | struct objio_state **outp) | ||
| 400 | { | 309 | { |
| 401 | struct objio_segment *objio_seg = OBJIO_LSEG(lseg); | 310 | struct objio_segment *objio_seg = OBJIO_LSEG(lseg); |
| 402 | struct objio_state *ios; | 311 | struct ore_io_state *ios; |
| 403 | const unsigned first_size = sizeof(*ios) + | 312 | int ret; |
| 404 | objio_seg->num_comps * sizeof(ios->per_dev[0]); | 313 | struct __alloc_objio_state { |
| 405 | const unsigned sec_size = objio_seg->num_comps * | 314 | struct objio_state objios; |
| 406 | sizeof(ios->ol_state.ioerrs[0]); | 315 | struct pnfs_osd_ioerr ioerrs[objio_seg->oc.numdevs]; |
| 407 | 316 | } *aos; | |
| 408 | ios = kzalloc(first_size + sec_size, gfp_flags); | 317 | |
| 409 | if (unlikely(!ios)) | 318 | aos = kzalloc(sizeof(*aos), gfp_flags); |
| 319 | if (unlikely(!aos)) | ||
| 410 | return -ENOMEM; | 320 | return -ENOMEM; |
| 411 | 321 | ||
| 412 | ios->layout = objio_seg; | 322 | objlayout_init_ioerrs(&aos->objios.oir, objio_seg->oc.numdevs, |
| 413 | ios->ol_state.ioerrs = ((void *)ios) + first_size; | 323 | aos->ioerrs, rpcdata, pnfs_layout_type); |
| 414 | ios->ol_state.num_comps = objio_seg->num_comps; | ||
| 415 | 324 | ||
| 416 | *outp = &ios->ol_state; | 325 | ret = ore_get_rw_state(&objio_seg->layout, &objio_seg->oc, is_reading, |
| 326 | offset, count, &ios); | ||
| 327 | if (unlikely(ret)) { | ||
| 328 | kfree(aos); | ||
| 329 | return ret; | ||
| 330 | } | ||
| 331 | |||
| 332 | ios->pages = pages; | ||
| 333 | ios->pgbase = pgbase; | ||
| 334 | ios->private = aos; | ||
| 335 | BUG_ON(ios->nr_pages > (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT); | ||
| 336 | |||
| 337 | aos->objios.sync = 0; | ||
| 338 | aos->objios.ios = ios; | ||
| 339 | *outp = &aos->objios; | ||
| 417 | return 0; | 340 | return 0; |
| 418 | } | 341 | } |
| 419 | 342 | ||
| 420 | void objio_free_io_state(struct objlayout_io_state *ol_state) | 343 | void objio_free_result(struct objlayout_io_res *oir) |
| 421 | { | 344 | { |
| 422 | struct objio_state *ios = container_of(ol_state, struct objio_state, | 345 | struct objio_state *objios = container_of(oir, struct objio_state, oir); |
| 423 | ol_state); | ||
| 424 | 346 | ||
| 425 | kfree(ios); | 347 | ore_put_io_state(objios->ios); |
| 348 | kfree(objios); | ||
| 426 | } | 349 | } |
| 427 | 350 | ||
| 428 | enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep) | 351 | enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep) |
| @@ -455,539 +378,152 @@ enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep) | |||
| 455 | } | 378 | } |
| 456 | } | 379 | } |
| 457 | 380 | ||
| 458 | static void _clear_bio(struct bio *bio) | 381 | static void __on_dev_error(struct ore_io_state *ios, |
| 382 | struct ore_dev *od, unsigned dev_index, enum osd_err_priority oep, | ||
| 383 | u64 dev_offset, u64 dev_len) | ||
| 459 | { | 384 | { |
| 460 | struct bio_vec *bv; | 385 | struct objio_state *objios = ios->private; |
| 461 | unsigned i; | 386 | struct pnfs_osd_objid pooid; |
| 462 | 387 | struct objio_dev_ent *ode = container_of(od, typeof(*ode), od); | |
| 463 | __bio_for_each_segment(bv, bio, i, 0) { | 388 | /* FIXME: what to do with more-then-one-group layouts. We need to |
| 464 | unsigned this_count = bv->bv_len; | 389 | * translate from ore_io_state index to oc->comps index |
| 465 | 390 | */ | |
| 466 | if (likely(PAGE_SIZE == this_count)) | 391 | unsigned comp = dev_index; |
| 467 | clear_highpage(bv->bv_page); | ||
| 468 | else | ||
| 469 | zero_user(bv->bv_page, bv->bv_offset, this_count); | ||
| 470 | } | ||
| 471 | } | ||
| 472 | |||
| 473 | static int _io_check(struct objio_state *ios, bool is_write) | ||
| 474 | { | ||
| 475 | enum osd_err_priority oep = OSD_ERR_PRI_NO_ERROR; | ||
| 476 | int lin_ret = 0; | ||
| 477 | int i; | ||
| 478 | |||
| 479 | for (i = 0; i < ios->numdevs; i++) { | ||
| 480 | struct osd_sense_info osi; | ||
| 481 | struct osd_request *or = ios->per_dev[i].or; | ||
| 482 | int ret; | ||
| 483 | |||
| 484 | if (!or) | ||
| 485 | continue; | ||
| 486 | 392 | ||
| 487 | ret = osd_req_decode_sense(or, &osi); | 393 | pooid.oid_device_id = ode->id_node.deviceid; |
| 488 | if (likely(!ret)) | 394 | pooid.oid_partition_id = ios->oc->comps[comp].obj.partition; |
| 489 | continue; | 395 | pooid.oid_object_id = ios->oc->comps[comp].obj.id; |
| 490 | 396 | ||
| 491 | if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) { | 397 | objlayout_io_set_result(&objios->oir, comp, |
| 492 | /* start read offset passed endof file */ | 398 | &pooid, osd_pri_2_pnfs_err(oep), |
| 493 | BUG_ON(is_write); | 399 | dev_offset, dev_len, !ios->reading); |
| 494 | _clear_bio(ios->per_dev[i].bio); | ||
| 495 | dprintk("%s: start read offset passed end of file " | ||
| 496 | "offset=0x%llx, length=0x%lx\n", __func__, | ||
| 497 | _LLU(ios->per_dev[i].offset), | ||
| 498 | ios->per_dev[i].length); | ||
| 499 | |||
| 500 | continue; /* we recovered */ | ||
| 501 | } | ||
| 502 | objlayout_io_set_result(&ios->ol_state, i, | ||
| 503 | &ios->layout->comps[i].oc_object_id, | ||
| 504 | osd_pri_2_pnfs_err(osi.osd_err_pri), | ||
| 505 | ios->per_dev[i].offset, | ||
| 506 | ios->per_dev[i].length, | ||
| 507 | is_write); | ||
| 508 | |||
| 509 | if (osi.osd_err_pri >= oep) { | ||
| 510 | oep = osi.osd_err_pri; | ||
| 511 | lin_ret = ret; | ||
| 512 | } | ||
| 513 | } | ||
| 514 | |||
| 515 | return lin_ret; | ||
| 516 | } | ||
| 517 | |||
| 518 | /* | ||
| 519 | * Common IO state helpers. | ||
| 520 | */ | ||
| 521 | static void _io_free(struct objio_state *ios) | ||
| 522 | { | ||
| 523 | unsigned i; | ||
| 524 | |||
| 525 | for (i = 0; i < ios->numdevs; i++) { | ||
| 526 | struct _objio_per_comp *per_dev = &ios->per_dev[i]; | ||
| 527 | |||
| 528 | if (per_dev->or) { | ||
| 529 | osd_end_request(per_dev->or); | ||
| 530 | per_dev->or = NULL; | ||
| 531 | } | ||
| 532 | |||
| 533 | if (per_dev->bio) { | ||
| 534 | bio_put(per_dev->bio); | ||
| 535 | per_dev->bio = NULL; | ||
| 536 | } | ||
| 537 | } | ||
| 538 | } | ||
| 539 | |||
| 540 | struct osd_dev *_io_od(struct objio_state *ios, unsigned dev) | ||
| 541 | { | ||
| 542 | unsigned min_dev = ios->layout->comps_index; | ||
| 543 | unsigned max_dev = min_dev + ios->layout->num_comps; | ||
| 544 | |||
| 545 | BUG_ON(dev < min_dev || max_dev <= dev); | ||
| 546 | return ios->layout->ods[dev - min_dev]->od; | ||
| 547 | } | ||
| 548 | |||
| 549 | struct _striping_info { | ||
| 550 | u64 obj_offset; | ||
| 551 | u64 group_length; | ||
| 552 | unsigned dev; | ||
| 553 | unsigned unit_off; | ||
| 554 | }; | ||
| 555 | |||
| 556 | static void _calc_stripe_info(struct objio_state *ios, u64 file_offset, | ||
| 557 | struct _striping_info *si) | ||
| 558 | { | ||
| 559 | u32 stripe_unit = ios->layout->stripe_unit; | ||
| 560 | u32 group_width = ios->layout->group_width; | ||
| 561 | u64 group_depth = ios->layout->group_depth; | ||
| 562 | u32 U = stripe_unit * group_width; | ||
| 563 | |||
| 564 | u64 T = U * group_depth; | ||
| 565 | u64 S = T * ios->layout->group_count; | ||
| 566 | u64 M = div64_u64(file_offset, S); | ||
| 567 | |||
| 568 | /* | ||
| 569 | G = (L - (M * S)) / T | ||
| 570 | H = (L - (M * S)) % T | ||
| 571 | */ | ||
| 572 | u64 LmodU = file_offset - M * S; | ||
| 573 | u32 G = div64_u64(LmodU, T); | ||
| 574 | u64 H = LmodU - G * T; | ||
| 575 | |||
| 576 | u32 N = div_u64(H, U); | ||
| 577 | |||
| 578 | div_u64_rem(file_offset, stripe_unit, &si->unit_off); | ||
| 579 | si->obj_offset = si->unit_off + (N * stripe_unit) + | ||
| 580 | (M * group_depth * stripe_unit); | ||
| 581 | |||
| 582 | /* "H - (N * U)" is just "H % U" so it's bound to u32 */ | ||
| 583 | si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width; | ||
| 584 | si->dev *= ios->layout->mirrors_p1; | ||
| 585 | |||
| 586 | si->group_length = T - H; | ||
| 587 | } | ||
| 588 | |||
| 589 | static int _add_stripe_unit(struct objio_state *ios, unsigned *cur_pg, | ||
| 590 | unsigned pgbase, struct _objio_per_comp *per_dev, int len, | ||
| 591 | gfp_t gfp_flags) | ||
| 592 | { | ||
| 593 | unsigned pg = *cur_pg; | ||
| 594 | int cur_len = len; | ||
| 595 | struct request_queue *q = | ||
| 596 | osd_request_queue(_io_od(ios, per_dev->dev)); | ||
| 597 | |||
| 598 | if (per_dev->bio == NULL) { | ||
| 599 | unsigned pages_in_stripe = ios->layout->group_width * | ||
| 600 | (ios->layout->stripe_unit / PAGE_SIZE); | ||
| 601 | unsigned bio_size = (ios->ol_state.nr_pages + pages_in_stripe) / | ||
| 602 | ios->layout->group_width; | ||
| 603 | |||
| 604 | if (BIO_MAX_PAGES_KMALLOC < bio_size) | ||
| 605 | bio_size = BIO_MAX_PAGES_KMALLOC; | ||
| 606 | |||
| 607 | per_dev->bio = bio_kmalloc(gfp_flags, bio_size); | ||
| 608 | if (unlikely(!per_dev->bio)) { | ||
| 609 | dprintk("Faild to allocate BIO size=%u\n", bio_size); | ||
| 610 | return -ENOMEM; | ||
| 611 | } | ||
| 612 | } | ||
| 613 | |||
| 614 | while (cur_len > 0) { | ||
| 615 | unsigned pglen = min_t(unsigned, PAGE_SIZE - pgbase, cur_len); | ||
| 616 | unsigned added_len; | ||
| 617 | |||
| 618 | BUG_ON(ios->ol_state.nr_pages <= pg); | ||
| 619 | cur_len -= pglen; | ||
| 620 | |||
| 621 | added_len = bio_add_pc_page(q, per_dev->bio, | ||
| 622 | ios->ol_state.pages[pg], pglen, pgbase); | ||
| 623 | if (unlikely(pglen != added_len)) | ||
| 624 | return -ENOMEM; | ||
| 625 | pgbase = 0; | ||
| 626 | ++pg; | ||
| 627 | } | ||
| 628 | BUG_ON(cur_len); | ||
| 629 | |||
| 630 | per_dev->length += len; | ||
| 631 | *cur_pg = pg; | ||
| 632 | return 0; | ||
| 633 | } | ||
| 634 | |||
| 635 | static int _prepare_one_group(struct objio_state *ios, u64 length, | ||
| 636 | struct _striping_info *si, unsigned *last_pg, | ||
| 637 | gfp_t gfp_flags) | ||
| 638 | { | ||
| 639 | unsigned stripe_unit = ios->layout->stripe_unit; | ||
| 640 | unsigned mirrors_p1 = ios->layout->mirrors_p1; | ||
| 641 | unsigned devs_in_group = ios->layout->group_width * mirrors_p1; | ||
| 642 | unsigned dev = si->dev; | ||
| 643 | unsigned first_dev = dev - (dev % devs_in_group); | ||
| 644 | unsigned max_comp = ios->numdevs ? ios->numdevs - mirrors_p1 : 0; | ||
| 645 | unsigned cur_pg = *last_pg; | ||
| 646 | int ret = 0; | ||
| 647 | |||
| 648 | while (length) { | ||
| 649 | struct _objio_per_comp *per_dev = &ios->per_dev[dev - first_dev]; | ||
| 650 | unsigned cur_len, page_off = 0; | ||
| 651 | |||
| 652 | if (!per_dev->length) { | ||
| 653 | per_dev->dev = dev; | ||
| 654 | if (dev < si->dev) { | ||
| 655 | per_dev->offset = si->obj_offset + stripe_unit - | ||
| 656 | si->unit_off; | ||
| 657 | cur_len = stripe_unit; | ||
| 658 | } else if (dev == si->dev) { | ||
| 659 | per_dev->offset = si->obj_offset; | ||
| 660 | cur_len = stripe_unit - si->unit_off; | ||
| 661 | page_off = si->unit_off & ~PAGE_MASK; | ||
| 662 | BUG_ON(page_off && | ||
| 663 | (page_off != ios->ol_state.pgbase)); | ||
| 664 | } else { /* dev > si->dev */ | ||
| 665 | per_dev->offset = si->obj_offset - si->unit_off; | ||
| 666 | cur_len = stripe_unit; | ||
| 667 | } | ||
| 668 | |||
| 669 | if (max_comp < dev - first_dev) | ||
| 670 | max_comp = dev - first_dev; | ||
| 671 | } else { | ||
| 672 | cur_len = stripe_unit; | ||
| 673 | } | ||
| 674 | if (cur_len >= length) | ||
| 675 | cur_len = length; | ||
| 676 | |||
| 677 | ret = _add_stripe_unit(ios, &cur_pg, page_off , per_dev, | ||
| 678 | cur_len, gfp_flags); | ||
| 679 | if (unlikely(ret)) | ||
| 680 | goto out; | ||
| 681 | |||
| 682 | dev += mirrors_p1; | ||
| 683 | dev = (dev % devs_in_group) + first_dev; | ||
| 684 | |||
| 685 | length -= cur_len; | ||
| 686 | ios->length += cur_len; | ||
| 687 | } | ||
| 688 | out: | ||
| 689 | ios->numdevs = max_comp + mirrors_p1; | ||
| 690 | *last_pg = cur_pg; | ||
| 691 | return ret; | ||
| 692 | } | ||
| 693 | |||
| 694 | static int _io_rw_pagelist(struct objio_state *ios, gfp_t gfp_flags) | ||
| 695 | { | ||
| 696 | u64 length = ios->ol_state.count; | ||
| 697 | u64 offset = ios->ol_state.offset; | ||
| 698 | struct _striping_info si; | ||
| 699 | unsigned last_pg = 0; | ||
| 700 | int ret = 0; | ||
| 701 | |||
| 702 | while (length) { | ||
| 703 | _calc_stripe_info(ios, offset, &si); | ||
| 704 | |||
| 705 | if (length < si.group_length) | ||
| 706 | si.group_length = length; | ||
| 707 | |||
| 708 | ret = _prepare_one_group(ios, si.group_length, &si, &last_pg, gfp_flags); | ||
| 709 | if (unlikely(ret)) | ||
| 710 | goto out; | ||
| 711 | |||
| 712 | offset += si.group_length; | ||
| 713 | length -= si.group_length; | ||
| 714 | } | ||
| 715 | |||
| 716 | out: | ||
| 717 | if (!ios->length) | ||
| 718 | return ret; | ||
| 719 | |||
| 720 | return 0; | ||
| 721 | } | ||
| 722 | |||
| 723 | static ssize_t _sync_done(struct objio_state *ios) | ||
| 724 | { | ||
| 725 | struct completion *waiting = ios->private; | ||
| 726 | |||
| 727 | complete(waiting); | ||
| 728 | return 0; | ||
| 729 | } | ||
| 730 | |||
| 731 | static void _last_io(struct kref *kref) | ||
| 732 | { | ||
| 733 | struct objio_state *ios = container_of(kref, struct objio_state, kref); | ||
| 734 | |||
| 735 | ios->done(ios); | ||
| 736 | } | ||
| 737 | |||
| 738 | static void _done_io(struct osd_request *or, void *p) | ||
| 739 | { | ||
| 740 | struct objio_state *ios = p; | ||
| 741 | |||
| 742 | kref_put(&ios->kref, _last_io); | ||
| 743 | } | ||
| 744 | |||
| 745 | static ssize_t _io_exec(struct objio_state *ios) | ||
| 746 | { | ||
| 747 | DECLARE_COMPLETION_ONSTACK(wait); | ||
| 748 | ssize_t status = 0; /* sync status */ | ||
| 749 | unsigned i; | ||
| 750 | objio_done_fn saved_done_fn = ios->done; | ||
| 751 | bool sync = ios->ol_state.sync; | ||
| 752 | |||
| 753 | if (sync) { | ||
| 754 | ios->done = _sync_done; | ||
| 755 | ios->private = &wait; | ||
| 756 | } | ||
| 757 | |||
| 758 | kref_init(&ios->kref); | ||
| 759 | |||
| 760 | for (i = 0; i < ios->numdevs; i++) { | ||
| 761 | struct osd_request *or = ios->per_dev[i].or; | ||
| 762 | |||
| 763 | if (!or) | ||
| 764 | continue; | ||
| 765 | |||
| 766 | kref_get(&ios->kref); | ||
| 767 | osd_execute_request_async(or, _done_io, ios); | ||
| 768 | } | ||
| 769 | |||
| 770 | kref_put(&ios->kref, _last_io); | ||
| 771 | |||
| 772 | if (sync) { | ||
| 773 | wait_for_completion(&wait); | ||
| 774 | status = saved_done_fn(ios); | ||
| 775 | } | ||
| 776 | |||
| 777 | return status; | ||
| 778 | } | 400 | } |
| 779 | 401 | ||
| 780 | /* | 402 | /* |
| 781 | * read | 403 | * read |
| 782 | */ | 404 | */ |
| 783 | static ssize_t _read_done(struct objio_state *ios) | 405 | static void _read_done(struct ore_io_state *ios, void *private) |
| 784 | { | 406 | { |
| 407 | struct objio_state *objios = private; | ||
| 785 | ssize_t status; | 408 | ssize_t status; |
| 786 | int ret = _io_check(ios, false); | 409 | int ret = ore_check_io(ios, &__on_dev_error); |
| 787 | 410 | ||
| 788 | _io_free(ios); | 411 | /* FIXME: _io_free(ios) can we dealocate the libosd resources; */ |
| 789 | 412 | ||
| 790 | if (likely(!ret)) | 413 | if (likely(!ret)) |
| 791 | status = ios->length; | 414 | status = ios->length; |
| 792 | else | 415 | else |
| 793 | status = ret; | 416 | status = ret; |
| 794 | 417 | ||
| 795 | objlayout_read_done(&ios->ol_state, status, ios->ol_state.sync); | 418 | objlayout_read_done(&objios->oir, status, objios->sync); |
| 796 | return status; | ||
| 797 | } | 419 | } |
| 798 | 420 | ||
| 799 | static int _read_mirrors(struct objio_state *ios, unsigned cur_comp) | 421 | int objio_read_pagelist(struct nfs_read_data *rdata) |
| 800 | { | 422 | { |
| 801 | struct osd_request *or = NULL; | 423 | struct objio_state *objios; |
| 802 | struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp]; | ||
| 803 | unsigned dev = per_dev->dev; | ||
| 804 | struct pnfs_osd_object_cred *cred = | ||
| 805 | &ios->layout->comps[cur_comp]; | ||
| 806 | struct osd_obj_id obj = { | ||
| 807 | .partition = cred->oc_object_id.oid_partition_id, | ||
| 808 | .id = cred->oc_object_id.oid_object_id, | ||
| 809 | }; | ||
| 810 | int ret; | 424 | int ret; |
| 811 | 425 | ||
| 812 | or = osd_start_request(_io_od(ios, dev), GFP_KERNEL); | 426 | ret = objio_alloc_io_state(NFS_I(rdata->inode)->layout, true, |
| 813 | if (unlikely(!or)) { | 427 | rdata->lseg, rdata->args.pages, rdata->args.pgbase, |
| 814 | ret = -ENOMEM; | 428 | rdata->args.offset, rdata->args.count, rdata, |
| 815 | goto err; | 429 | GFP_KERNEL, &objios); |
| 816 | } | ||
| 817 | per_dev->or = or; | ||
| 818 | |||
| 819 | osd_req_read(or, &obj, per_dev->offset, per_dev->bio, per_dev->length); | ||
| 820 | |||
| 821 | ret = osd_finalize_request(or, 0, cred->oc_cap.cred, NULL); | ||
| 822 | if (ret) { | ||
| 823 | dprintk("%s: Faild to osd_finalize_request() => %d\n", | ||
| 824 | __func__, ret); | ||
| 825 | goto err; | ||
| 826 | } | ||
| 827 | |||
| 828 | dprintk("%s:[%d] dev=%d obj=0x%llx start=0x%llx length=0x%lx\n", | ||
| 829 | __func__, cur_comp, dev, obj.id, _LLU(per_dev->offset), | ||
| 830 | per_dev->length); | ||
| 831 | |||
| 832 | err: | ||
| 833 | return ret; | ||
| 834 | } | ||
| 835 | |||
| 836 | static ssize_t _read_exec(struct objio_state *ios) | ||
| 837 | { | ||
| 838 | unsigned i; | ||
| 839 | int ret; | ||
| 840 | |||
| 841 | for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) { | ||
| 842 | if (!ios->per_dev[i].length) | ||
| 843 | continue; | ||
| 844 | ret = _read_mirrors(ios, i); | ||
| 845 | if (unlikely(ret)) | ||
| 846 | goto err; | ||
| 847 | } | ||
| 848 | |||
| 849 | ios->done = _read_done; | ||
| 850 | return _io_exec(ios); /* In sync mode exec returns the io status */ | ||
| 851 | |||
| 852 | err: | ||
| 853 | _io_free(ios); | ||
| 854 | return ret; | ||
| 855 | } | ||
| 856 | |||
| 857 | ssize_t objio_read_pagelist(struct objlayout_io_state *ol_state) | ||
| 858 | { | ||
| 859 | struct objio_state *ios = container_of(ol_state, struct objio_state, | ||
| 860 | ol_state); | ||
| 861 | int ret; | ||
| 862 | |||
| 863 | ret = _io_rw_pagelist(ios, GFP_KERNEL); | ||
| 864 | if (unlikely(ret)) | 430 | if (unlikely(ret)) |
| 865 | return ret; | 431 | return ret; |
| 866 | 432 | ||
| 867 | return _read_exec(ios); | 433 | objios->ios->done = _read_done; |
| 434 | dprintk("%s: offset=0x%llx length=0x%x\n", __func__, | ||
| 435 | rdata->args.offset, rdata->args.count); | ||
| 436 | return ore_read(objios->ios); | ||
| 868 | } | 437 | } |
| 869 | 438 | ||
| 870 | /* | 439 | /* |
| 871 | * write | 440 | * write |
| 872 | */ | 441 | */ |
| 873 | static ssize_t _write_done(struct objio_state *ios) | 442 | static void _write_done(struct ore_io_state *ios, void *private) |
| 874 | { | 443 | { |
| 444 | struct objio_state *objios = private; | ||
| 875 | ssize_t status; | 445 | ssize_t status; |
| 876 | int ret = _io_check(ios, true); | 446 | int ret = ore_check_io(ios, &__on_dev_error); |
| 877 | 447 | ||
| 878 | _io_free(ios); | 448 | /* FIXME: _io_free(ios) can we dealocate the libosd resources; */ |
| 879 | 449 | ||
| 880 | if (likely(!ret)) { | 450 | if (likely(!ret)) { |
| 881 | /* FIXME: should be based on the OSD's persistence model | 451 | /* FIXME: should be based on the OSD's persistence model |
| 882 | * See OSD2r05 Section 4.13 Data persistence model */ | 452 | * See OSD2r05 Section 4.13 Data persistence model */ |
| 883 | ios->ol_state.committed = NFS_FILE_SYNC; | 453 | objios->oir.committed = NFS_FILE_SYNC; |
| 884 | status = ios->length; | 454 | status = ios->length; |
| 885 | } else { | 455 | } else { |
| 886 | status = ret; | 456 | status = ret; |
| 887 | } | 457 | } |
| 888 | 458 | ||
| 889 | objlayout_write_done(&ios->ol_state, status, ios->ol_state.sync); | 459 | objlayout_write_done(&objios->oir, status, objios->sync); |
| 890 | return status; | ||
| 891 | } | 460 | } |
| 892 | 461 | ||
| 893 | static int _write_mirrors(struct objio_state *ios, unsigned cur_comp) | 462 | static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate) |
| 894 | { | 463 | { |
| 895 | struct _objio_per_comp *master_dev = &ios->per_dev[cur_comp]; | 464 | struct objio_state *objios = priv; |
| 896 | unsigned dev = ios->per_dev[cur_comp].dev; | 465 | struct nfs_write_data *wdata = objios->oir.rpcdata; |
| 897 | unsigned last_comp = cur_comp + ios->layout->mirrors_p1; | 466 | pgoff_t index = offset / PAGE_SIZE; |
| 898 | int ret; | 467 | struct page *page = find_get_page(wdata->inode->i_mapping, index); |
| 899 | |||
| 900 | for (; cur_comp < last_comp; ++cur_comp, ++dev) { | ||
| 901 | struct osd_request *or = NULL; | ||
| 902 | struct pnfs_osd_object_cred *cred = | ||
| 903 | &ios->layout->comps[cur_comp]; | ||
| 904 | struct osd_obj_id obj = { | ||
| 905 | .partition = cred->oc_object_id.oid_partition_id, | ||
| 906 | .id = cred->oc_object_id.oid_object_id, | ||
| 907 | }; | ||
| 908 | struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp]; | ||
| 909 | struct bio *bio; | ||
| 910 | |||
| 911 | or = osd_start_request(_io_od(ios, dev), GFP_NOFS); | ||
| 912 | if (unlikely(!or)) { | ||
| 913 | ret = -ENOMEM; | ||
| 914 | goto err; | ||
| 915 | } | ||
| 916 | per_dev->or = or; | ||
| 917 | |||
| 918 | if (per_dev != master_dev) { | ||
| 919 | bio = bio_kmalloc(GFP_NOFS, | ||
| 920 | master_dev->bio->bi_max_vecs); | ||
| 921 | if (unlikely(!bio)) { | ||
| 922 | dprintk("Faild to allocate BIO size=%u\n", | ||
| 923 | master_dev->bio->bi_max_vecs); | ||
| 924 | ret = -ENOMEM; | ||
| 925 | goto err; | ||
| 926 | } | ||
| 927 | |||
| 928 | __bio_clone(bio, master_dev->bio); | ||
| 929 | bio->bi_bdev = NULL; | ||
| 930 | bio->bi_next = NULL; | ||
| 931 | per_dev->bio = bio; | ||
| 932 | per_dev->dev = dev; | ||
| 933 | per_dev->length = master_dev->length; | ||
| 934 | per_dev->offset = master_dev->offset; | ||
| 935 | } else { | ||
| 936 | bio = master_dev->bio; | ||
| 937 | bio->bi_rw |= REQ_WRITE; | ||
| 938 | } | ||
| 939 | |||
| 940 | osd_req_write(or, &obj, per_dev->offset, bio, per_dev->length); | ||
| 941 | 468 | ||
| 942 | ret = osd_finalize_request(or, 0, cred->oc_cap.cred, NULL); | 469 | if (!page) { |
| 943 | if (ret) { | 470 | page = find_or_create_page(wdata->inode->i_mapping, |
| 944 | dprintk("%s: Faild to osd_finalize_request() => %d\n", | 471 | index, GFP_NOFS); |
| 945 | __func__, ret); | 472 | if (unlikely(!page)) { |
| 946 | goto err; | 473 | dprintk("%s: grab_cache_page Failed index=0x%lx\n", |
| 474 | __func__, index); | ||
| 475 | return NULL; | ||
| 947 | } | 476 | } |
| 948 | 477 | unlock_page(page); | |
| 949 | dprintk("%s:[%d] dev=%d obj=0x%llx start=0x%llx length=0x%lx\n", | ||
| 950 | __func__, cur_comp, dev, obj.id, _LLU(per_dev->offset), | ||
| 951 | per_dev->length); | ||
| 952 | } | 478 | } |
| 479 | if (PageDirty(page) || PageWriteback(page)) | ||
| 480 | *uptodate = true; | ||
| 481 | else | ||
| 482 | *uptodate = PageUptodate(page); | ||
| 483 | dprintk("%s: index=0x%lx uptodate=%d\n", __func__, index, *uptodate); | ||
| 484 | return page; | ||
| 485 | } | ||
| 953 | 486 | ||
| 954 | err: | 487 | static void __r4w_put_page(void *priv, struct page *page) |
| 955 | return ret; | 488 | { |
| 489 | dprintk("%s: index=0x%lx\n", __func__, page->index); | ||
| 490 | page_cache_release(page); | ||
| 491 | return; | ||
| 956 | } | 492 | } |
| 957 | 493 | ||
| 958 | static ssize_t _write_exec(struct objio_state *ios) | 494 | static const struct _ore_r4w_op _r4w_op = { |
| 495 | .get_page = &__r4w_get_page, | ||
| 496 | .put_page = &__r4w_put_page, | ||
| 497 | }; | ||
| 498 | |||
| 499 | int objio_write_pagelist(struct nfs_write_data *wdata, int how) | ||
| 959 | { | 500 | { |
| 960 | unsigned i; | 501 | struct objio_state *objios; |
| 961 | int ret; | 502 | int ret; |
| 962 | 503 | ||
| 963 | for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) { | 504 | ret = objio_alloc_io_state(NFS_I(wdata->inode)->layout, false, |
| 964 | if (!ios->per_dev[i].length) | 505 | wdata->lseg, wdata->args.pages, wdata->args.pgbase, |
| 965 | continue; | 506 | wdata->args.offset, wdata->args.count, wdata, GFP_NOFS, |
| 966 | ret = _write_mirrors(ios, i); | 507 | &objios); |
| 967 | if (unlikely(ret)) | 508 | if (unlikely(ret)) |
| 968 | goto err; | 509 | return ret; |
| 969 | } | ||
| 970 | |||
| 971 | ios->done = _write_done; | ||
| 972 | return _io_exec(ios); /* In sync mode exec returns the io->status */ | ||
| 973 | 510 | ||
| 974 | err: | 511 | objios->sync = 0 != (how & FLUSH_SYNC); |
| 975 | _io_free(ios); | 512 | objios->ios->r4w = &_r4w_op; |
| 976 | return ret; | ||
| 977 | } | ||
| 978 | 513 | ||
| 979 | ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state, bool stable) | 514 | if (!objios->sync) |
| 980 | { | 515 | objios->ios->done = _write_done; |
| 981 | struct objio_state *ios = container_of(ol_state, struct objio_state, | ||
| 982 | ol_state); | ||
| 983 | int ret; | ||
| 984 | 516 | ||
| 985 | /* TODO: ios->stable = stable; */ | 517 | dprintk("%s: offset=0x%llx length=0x%x\n", __func__, |
| 986 | ret = _io_rw_pagelist(ios, GFP_NOFS); | 518 | wdata->args.offset, wdata->args.count); |
| 519 | ret = ore_write(objios->ios); | ||
| 987 | if (unlikely(ret)) | 520 | if (unlikely(ret)) |
| 988 | return ret; | 521 | return ret; |
| 989 | 522 | ||
| 990 | return _write_exec(ios); | 523 | if (objios->sync) |
| 524 | _write_done(objios->ios, objios); | ||
| 525 | |||
| 526 | return 0; | ||
| 991 | } | 527 | } |
| 992 | 528 | ||
| 993 | static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, | 529 | static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, |
| @@ -997,7 +533,7 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, | |||
| 997 | return false; | 533 | return false; |
| 998 | 534 | ||
| 999 | return pgio->pg_count + req->wb_bytes <= | 535 | return pgio->pg_count + req->wb_bytes <= |
| 1000 | OBJIO_LSEG(pgio->pg_lseg)->max_io_size; | 536 | OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length; |
| 1001 | } | 537 | } |
| 1002 | 538 | ||
| 1003 | static const struct nfs_pageio_ops objio_pg_read_ops = { | 539 | static const struct nfs_pageio_ops objio_pg_read_ops = { |
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index 1d06f8e2adea..72074e3a04f9 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c | |||
| @@ -156,77 +156,39 @@ last_byte_offset(u64 start, u64 len) | |||
| 156 | return end > start ? end - 1 : NFS4_MAX_UINT64; | 156 | return end > start ? end - 1 : NFS4_MAX_UINT64; |
| 157 | } | 157 | } |
| 158 | 158 | ||
| 159 | static struct objlayout_io_state * | 159 | void _fix_verify_io_params(struct pnfs_layout_segment *lseg, |
| 160 | objlayout_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type, | 160 | struct page ***p_pages, unsigned *p_pgbase, |
| 161 | struct page **pages, | 161 | u64 offset, unsigned long count) |
| 162 | unsigned pgbase, | ||
| 163 | loff_t offset, | ||
| 164 | size_t count, | ||
| 165 | struct pnfs_layout_segment *lseg, | ||
| 166 | void *rpcdata, | ||
| 167 | gfp_t gfp_flags) | ||
| 168 | { | 162 | { |
| 169 | struct objlayout_io_state *state; | ||
| 170 | u64 lseg_end_offset; | 163 | u64 lseg_end_offset; |
| 171 | 164 | ||
| 172 | dprintk("%s: allocating io_state\n", __func__); | ||
| 173 | if (objio_alloc_io_state(lseg, &state, gfp_flags)) | ||
| 174 | return NULL; | ||
| 175 | |||
| 176 | BUG_ON(offset < lseg->pls_range.offset); | 165 | BUG_ON(offset < lseg->pls_range.offset); |
| 177 | lseg_end_offset = end_offset(lseg->pls_range.offset, | 166 | lseg_end_offset = end_offset(lseg->pls_range.offset, |
| 178 | lseg->pls_range.length); | 167 | lseg->pls_range.length); |
| 179 | BUG_ON(offset >= lseg_end_offset); | 168 | BUG_ON(offset >= lseg_end_offset); |
| 180 | if (offset + count > lseg_end_offset) { | 169 | WARN_ON(offset + count > lseg_end_offset); |
| 181 | count = lseg->pls_range.length - | ||
| 182 | (offset - lseg->pls_range.offset); | ||
| 183 | dprintk("%s: truncated count %Zd\n", __func__, count); | ||
| 184 | } | ||
| 185 | 170 | ||
| 186 | if (pgbase > PAGE_SIZE) { | 171 | if (*p_pgbase > PAGE_SIZE) { |
| 187 | pages += pgbase >> PAGE_SHIFT; | 172 | dprintk("%s: pgbase(0x%x) > PAGE_SIZE\n", __func__, *p_pgbase); |
| 188 | pgbase &= ~PAGE_MASK; | 173 | *p_pages += *p_pgbase >> PAGE_SHIFT; |
| 174 | *p_pgbase &= ~PAGE_MASK; | ||
| 189 | } | 175 | } |
| 190 | |||
| 191 | INIT_LIST_HEAD(&state->err_list); | ||
| 192 | state->lseg = lseg; | ||
| 193 | state->rpcdata = rpcdata; | ||
| 194 | state->pages = pages; | ||
| 195 | state->pgbase = pgbase; | ||
| 196 | state->nr_pages = (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
| 197 | state->offset = offset; | ||
| 198 | state->count = count; | ||
| 199 | state->sync = 0; | ||
| 200 | |||
| 201 | return state; | ||
| 202 | } | ||
| 203 | |||
| 204 | static void | ||
| 205 | objlayout_free_io_state(struct objlayout_io_state *state) | ||
| 206 | { | ||
| 207 | dprintk("%s: freeing io_state\n", __func__); | ||
| 208 | if (unlikely(!state)) | ||
| 209 | return; | ||
| 210 | |||
| 211 | objio_free_io_state(state); | ||
| 212 | } | 176 | } |
| 213 | 177 | ||
| 214 | /* | 178 | /* |
| 215 | * I/O done common code | 179 | * I/O done common code |
| 216 | */ | 180 | */ |
| 217 | static void | 181 | static void |
| 218 | objlayout_iodone(struct objlayout_io_state *state) | 182 | objlayout_iodone(struct objlayout_io_res *oir) |
| 219 | { | 183 | { |
| 220 | dprintk("%s: state %p status\n", __func__, state); | 184 | if (likely(oir->status >= 0)) { |
| 221 | 185 | objio_free_result(oir); | |
| 222 | if (likely(state->status >= 0)) { | ||
| 223 | objlayout_free_io_state(state); | ||
| 224 | } else { | 186 | } else { |
| 225 | struct objlayout *objlay = OBJLAYOUT(state->lseg->pls_layout); | 187 | struct objlayout *objlay = oir->objlay; |
| 226 | 188 | ||
| 227 | spin_lock(&objlay->lock); | 189 | spin_lock(&objlay->lock); |
| 228 | objlay->delta_space_valid = OBJ_DSU_INVALID; | 190 | objlay->delta_space_valid = OBJ_DSU_INVALID; |
| 229 | list_add(&objlay->err_list, &state->err_list); | 191 | list_add(&objlay->err_list, &oir->err_list); |
| 230 | spin_unlock(&objlay->lock); | 192 | spin_unlock(&objlay->lock); |
| 231 | } | 193 | } |
| 232 | } | 194 | } |
| @@ -238,13 +200,13 @@ objlayout_iodone(struct objlayout_io_state *state) | |||
| 238 | * the error for later reporting at layout-return. | 200 | * the error for later reporting at layout-return. |
| 239 | */ | 201 | */ |
| 240 | void | 202 | void |
| 241 | objlayout_io_set_result(struct objlayout_io_state *state, unsigned index, | 203 | objlayout_io_set_result(struct objlayout_io_res *oir, unsigned index, |
| 242 | struct pnfs_osd_objid *pooid, int osd_error, | 204 | struct pnfs_osd_objid *pooid, int osd_error, |
| 243 | u64 offset, u64 length, bool is_write) | 205 | u64 offset, u64 length, bool is_write) |
| 244 | { | 206 | { |
| 245 | struct pnfs_osd_ioerr *ioerr = &state->ioerrs[index]; | 207 | struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[index]; |
| 246 | 208 | ||
| 247 | BUG_ON(index >= state->num_comps); | 209 | BUG_ON(index >= oir->num_comps); |
| 248 | if (osd_error) { | 210 | if (osd_error) { |
| 249 | ioerr->oer_component = *pooid; | 211 | ioerr->oer_component = *pooid; |
| 250 | ioerr->oer_comp_offset = offset; | 212 | ioerr->oer_comp_offset = offset; |
| @@ -285,21 +247,18 @@ static void _rpc_read_complete(struct work_struct *work) | |||
| 285 | } | 247 | } |
| 286 | 248 | ||
| 287 | void | 249 | void |
| 288 | objlayout_read_done(struct objlayout_io_state *state, ssize_t status, bool sync) | 250 | objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync) |
| 289 | { | 251 | { |
| 290 | int eof = state->eof; | 252 | struct nfs_read_data *rdata = oir->rpcdata; |
| 291 | struct nfs_read_data *rdata; | ||
| 292 | 253 | ||
| 293 | state->status = status; | 254 | oir->status = rdata->task.tk_status = status; |
| 294 | dprintk("%s: Begin status=%zd eof=%d\n", __func__, status, eof); | 255 | if (status >= 0) |
| 295 | rdata = state->rpcdata; | ||
| 296 | rdata->task.tk_status = status; | ||
| 297 | if (status >= 0) { | ||
| 298 | rdata->res.count = status; | 256 | rdata->res.count = status; |
| 299 | rdata->res.eof = eof; | 257 | objlayout_iodone(oir); |
| 300 | } | 258 | /* must not use oir after this point */ |
| 301 | objlayout_iodone(state); | 259 | |
| 302 | /* must not use state after this point */ | 260 | dprintk("%s: Return status=%zd eof=%d sync=%d\n", __func__, |
| 261 | status, rdata->res.eof, sync); | ||
| 303 | 262 | ||
| 304 | if (sync) | 263 | if (sync) |
| 305 | pnfs_ld_read_done(rdata); | 264 | pnfs_ld_read_done(rdata); |
| @@ -317,40 +276,36 @@ objlayout_read_pagelist(struct nfs_read_data *rdata) | |||
| 317 | { | 276 | { |
| 318 | loff_t offset = rdata->args.offset; | 277 | loff_t offset = rdata->args.offset; |
| 319 | size_t count = rdata->args.count; | 278 | size_t count = rdata->args.count; |
| 320 | struct objlayout_io_state *state; | 279 | int err; |
| 321 | ssize_t status = 0; | ||
| 322 | loff_t eof; | 280 | loff_t eof; |
| 323 | 281 | ||
| 324 | dprintk("%s: Begin inode %p offset %llu count %d\n", | ||
| 325 | __func__, rdata->inode, offset, (int)count); | ||
| 326 | |||
| 327 | eof = i_size_read(rdata->inode); | 282 | eof = i_size_read(rdata->inode); |
| 328 | if (unlikely(offset + count > eof)) { | 283 | if (unlikely(offset + count > eof)) { |
| 329 | if (offset >= eof) { | 284 | if (offset >= eof) { |
| 330 | status = 0; | 285 | err = 0; |
| 331 | rdata->res.count = 0; | 286 | rdata->res.count = 0; |
| 332 | rdata->res.eof = 1; | 287 | rdata->res.eof = 1; |
| 288 | /*FIXME: do we need to call pnfs_ld_read_done() */ | ||
| 333 | goto out; | 289 | goto out; |
| 334 | } | 290 | } |
| 335 | count = eof - offset; | 291 | count = eof - offset; |
| 336 | } | 292 | } |
| 337 | 293 | ||
| 338 | state = objlayout_alloc_io_state(NFS_I(rdata->inode)->layout, | 294 | rdata->res.eof = (offset + count) >= eof; |
| 339 | rdata->args.pages, rdata->args.pgbase, | 295 | _fix_verify_io_params(rdata->lseg, &rdata->args.pages, |
| 340 | offset, count, | 296 | &rdata->args.pgbase, |
| 341 | rdata->lseg, rdata, | 297 | rdata->args.offset, rdata->args.count); |
| 342 | GFP_KERNEL); | ||
| 343 | if (unlikely(!state)) { | ||
| 344 | status = -ENOMEM; | ||
| 345 | goto out; | ||
| 346 | } | ||
| 347 | 298 | ||
| 348 | state->eof = state->offset + state->count >= eof; | 299 | dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n", |
| 300 | __func__, rdata->inode->i_ino, offset, count, rdata->res.eof); | ||
| 349 | 301 | ||
| 350 | status = objio_read_pagelist(state); | 302 | err = objio_read_pagelist(rdata); |
| 351 | out: | 303 | out: |
| 352 | dprintk("%s: Return status %Zd\n", __func__, status); | 304 | if (unlikely(err)) { |
| 353 | rdata->pnfs_error = status; | 305 | rdata->pnfs_error = err; |
| 306 | dprintk("%s: Returned Error %d\n", __func__, err); | ||
| 307 | return PNFS_NOT_ATTEMPTED; | ||
| 308 | } | ||
| 354 | return PNFS_ATTEMPTED; | 309 | return PNFS_ATTEMPTED; |
| 355 | } | 310 | } |
| 356 | 311 | ||
| @@ -371,26 +326,20 @@ static void _rpc_write_complete(struct work_struct *work) | |||
| 371 | } | 326 | } |
| 372 | 327 | ||
| 373 | void | 328 | void |
| 374 | objlayout_write_done(struct objlayout_io_state *state, ssize_t status, | 329 | objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync) |
| 375 | bool sync) | ||
| 376 | { | 330 | { |
| 377 | struct nfs_write_data *wdata; | 331 | struct nfs_write_data *wdata = oir->rpcdata; |
| 378 | 332 | ||
| 379 | dprintk("%s: Begin\n", __func__); | 333 | oir->status = wdata->task.tk_status = status; |
| 380 | wdata = state->rpcdata; | ||
| 381 | state->status = status; | ||
| 382 | wdata->task.tk_status = status; | ||
| 383 | if (status >= 0) { | 334 | if (status >= 0) { |
| 384 | wdata->res.count = status; | 335 | wdata->res.count = status; |
| 385 | wdata->verf.committed = state->committed; | 336 | wdata->verf.committed = oir->committed; |
| 386 | dprintk("%s: Return status %d committed %d\n", | 337 | } |
| 387 | __func__, wdata->task.tk_status, | 338 | objlayout_iodone(oir); |
| 388 | wdata->verf.committed); | 339 | /* must not use oir after this point */ |
| 389 | } else | 340 | |
| 390 | dprintk("%s: Return status %d\n", | 341 | dprintk("%s: Return status %zd committed %d sync=%d\n", __func__, |
| 391 | __func__, wdata->task.tk_status); | 342 | status, wdata->verf.committed, sync); |
| 392 | objlayout_iodone(state); | ||
| 393 | /* must not use state after this point */ | ||
| 394 | 343 | ||
| 395 | if (sync) | 344 | if (sync) |
| 396 | pnfs_ld_write_done(wdata); | 345 | pnfs_ld_write_done(wdata); |
| @@ -407,30 +356,18 @@ enum pnfs_try_status | |||
| 407 | objlayout_write_pagelist(struct nfs_write_data *wdata, | 356 | objlayout_write_pagelist(struct nfs_write_data *wdata, |
| 408 | int how) | 357 | int how) |
| 409 | { | 358 | { |
| 410 | struct objlayout_io_state *state; | 359 | int err; |
| 411 | ssize_t status; | ||
| 412 | |||
| 413 | dprintk("%s: Begin inode %p offset %llu count %u\n", | ||
| 414 | __func__, wdata->inode, wdata->args.offset, wdata->args.count); | ||
| 415 | |||
| 416 | state = objlayout_alloc_io_state(NFS_I(wdata->inode)->layout, | ||
| 417 | wdata->args.pages, | ||
| 418 | wdata->args.pgbase, | ||
| 419 | wdata->args.offset, | ||
| 420 | wdata->args.count, | ||
| 421 | wdata->lseg, wdata, | ||
| 422 | GFP_NOFS); | ||
| 423 | if (unlikely(!state)) { | ||
| 424 | status = -ENOMEM; | ||
| 425 | goto out; | ||
| 426 | } | ||
| 427 | 360 | ||
| 428 | state->sync = how & FLUSH_SYNC; | 361 | _fix_verify_io_params(wdata->lseg, &wdata->args.pages, |
| 362 | &wdata->args.pgbase, | ||
| 363 | wdata->args.offset, wdata->args.count); | ||
| 429 | 364 | ||
| 430 | status = objio_write_pagelist(state, how & FLUSH_STABLE); | 365 | err = objio_write_pagelist(wdata, how); |
| 431 | out: | 366 | if (unlikely(err)) { |
| 432 | dprintk("%s: Return status %Zd\n", __func__, status); | 367 | wdata->pnfs_error = err; |
| 433 | wdata->pnfs_error = status; | 368 | dprintk("%s: Returned Error %d\n", __func__, err); |
| 369 | return PNFS_NOT_ATTEMPTED; | ||
| 370 | } | ||
| 434 | return PNFS_ATTEMPTED; | 371 | return PNFS_ATTEMPTED; |
| 435 | } | 372 | } |
| 436 | 373 | ||
| @@ -537,14 +474,14 @@ merge_ioerr(struct pnfs_osd_ioerr *dest_err, | |||
| 537 | static void | 474 | static void |
| 538 | encode_accumulated_error(struct objlayout *objlay, __be32 *p) | 475 | encode_accumulated_error(struct objlayout *objlay, __be32 *p) |
| 539 | { | 476 | { |
| 540 | struct objlayout_io_state *state, *tmp; | 477 | struct objlayout_io_res *oir, *tmp; |
| 541 | struct pnfs_osd_ioerr accumulated_err = {.oer_errno = 0}; | 478 | struct pnfs_osd_ioerr accumulated_err = {.oer_errno = 0}; |
| 542 | 479 | ||
| 543 | list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) { | 480 | list_for_each_entry_safe(oir, tmp, &objlay->err_list, err_list) { |
| 544 | unsigned i; | 481 | unsigned i; |
| 545 | 482 | ||
| 546 | for (i = 0; i < state->num_comps; i++) { | 483 | for (i = 0; i < oir->num_comps; i++) { |
| 547 | struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i]; | 484 | struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[i]; |
| 548 | 485 | ||
| 549 | if (!ioerr->oer_errno) | 486 | if (!ioerr->oer_errno) |
| 550 | continue; | 487 | continue; |
| @@ -563,8 +500,8 @@ encode_accumulated_error(struct objlayout *objlay, __be32 *p) | |||
| 563 | 500 | ||
| 564 | merge_ioerr(&accumulated_err, ioerr); | 501 | merge_ioerr(&accumulated_err, ioerr); |
| 565 | } | 502 | } |
| 566 | list_del(&state->err_list); | 503 | list_del(&oir->err_list); |
| 567 | objlayout_free_io_state(state); | 504 | objio_free_result(oir); |
| 568 | } | 505 | } |
| 569 | 506 | ||
| 570 | pnfs_osd_xdr_encode_ioerr(p, &accumulated_err); | 507 | pnfs_osd_xdr_encode_ioerr(p, &accumulated_err); |
| @@ -576,7 +513,7 @@ objlayout_encode_layoutreturn(struct pnfs_layout_hdr *pnfslay, | |||
| 576 | const struct nfs4_layoutreturn_args *args) | 513 | const struct nfs4_layoutreturn_args *args) |
| 577 | { | 514 | { |
| 578 | struct objlayout *objlay = OBJLAYOUT(pnfslay); | 515 | struct objlayout *objlay = OBJLAYOUT(pnfslay); |
| 579 | struct objlayout_io_state *state, *tmp; | 516 | struct objlayout_io_res *oir, *tmp; |
| 580 | __be32 *start; | 517 | __be32 *start; |
| 581 | 518 | ||
| 582 | dprintk("%s: Begin\n", __func__); | 519 | dprintk("%s: Begin\n", __func__); |
| @@ -585,13 +522,13 @@ objlayout_encode_layoutreturn(struct pnfs_layout_hdr *pnfslay, | |||
| 585 | 522 | ||
| 586 | spin_lock(&objlay->lock); | 523 | spin_lock(&objlay->lock); |
| 587 | 524 | ||
| 588 | list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) { | 525 | list_for_each_entry_safe(oir, tmp, &objlay->err_list, err_list) { |
| 589 | __be32 *last_xdr = NULL, *p; | 526 | __be32 *last_xdr = NULL, *p; |
| 590 | unsigned i; | 527 | unsigned i; |
| 591 | int res = 0; | 528 | int res = 0; |
| 592 | 529 | ||
| 593 | for (i = 0; i < state->num_comps; i++) { | 530 | for (i = 0; i < oir->num_comps; i++) { |
| 594 | struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i]; | 531 | struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[i]; |
| 595 | 532 | ||
| 596 | if (!ioerr->oer_errno) | 533 | if (!ioerr->oer_errno) |
| 597 | continue; | 534 | continue; |
| @@ -615,7 +552,7 @@ objlayout_encode_layoutreturn(struct pnfs_layout_hdr *pnfslay, | |||
| 615 | } | 552 | } |
| 616 | 553 | ||
| 617 | last_xdr = p; | 554 | last_xdr = p; |
| 618 | pnfs_osd_xdr_encode_ioerr(p, &state->ioerrs[i]); | 555 | pnfs_osd_xdr_encode_ioerr(p, &oir->ioerrs[i]); |
| 619 | } | 556 | } |
| 620 | 557 | ||
| 621 | /* TODO: use xdr_write_pages */ | 558 | /* TODO: use xdr_write_pages */ |
| @@ -631,8 +568,8 @@ objlayout_encode_layoutreturn(struct pnfs_layout_hdr *pnfslay, | |||
| 631 | encode_accumulated_error(objlay, last_xdr); | 568 | encode_accumulated_error(objlay, last_xdr); |
| 632 | goto loop_done; | 569 | goto loop_done; |
| 633 | } | 570 | } |
| 634 | list_del(&state->err_list); | 571 | list_del(&oir->err_list); |
| 635 | objlayout_free_io_state(state); | 572 | objio_free_result(oir); |
| 636 | } | 573 | } |
| 637 | loop_done: | 574 | loop_done: |
| 638 | spin_unlock(&objlay->lock); | 575 | spin_unlock(&objlay->lock); |
diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h index a8244c8e042d..8ec34727ed21 100644 --- a/fs/nfs/objlayout/objlayout.h +++ b/fs/nfs/objlayout/objlayout.h | |||
| @@ -74,19 +74,11 @@ OBJLAYOUT(struct pnfs_layout_hdr *lo) | |||
| 74 | * per-I/O operation state | 74 | * per-I/O operation state |
| 75 | * embedded in objects provider io_state data structure | 75 | * embedded in objects provider io_state data structure |
| 76 | */ | 76 | */ |
| 77 | struct objlayout_io_state { | 77 | struct objlayout_io_res { |
| 78 | struct pnfs_layout_segment *lseg; | 78 | struct objlayout *objlay; |
| 79 | |||
| 80 | struct page **pages; | ||
| 81 | unsigned pgbase; | ||
| 82 | unsigned nr_pages; | ||
| 83 | unsigned long count; | ||
| 84 | loff_t offset; | ||
| 85 | bool sync; | ||
| 86 | 79 | ||
| 87 | void *rpcdata; | 80 | void *rpcdata; |
| 88 | int status; /* res */ | 81 | int status; /* res */ |
| 89 | int eof; /* res */ | ||
| 90 | int committed; /* res */ | 82 | int committed; /* res */ |
| 91 | 83 | ||
| 92 | /* Error reporting (layout_return) */ | 84 | /* Error reporting (layout_return) */ |
| @@ -100,6 +92,18 @@ struct objlayout_io_state { | |||
| 100 | struct pnfs_osd_ioerr *ioerrs; | 92 | struct pnfs_osd_ioerr *ioerrs; |
| 101 | }; | 93 | }; |
| 102 | 94 | ||
| 95 | static inline | ||
| 96 | void objlayout_init_ioerrs(struct objlayout_io_res *oir, unsigned num_comps, | ||
| 97 | struct pnfs_osd_ioerr *ioerrs, void *rpcdata, | ||
| 98 | struct pnfs_layout_hdr *pnfs_layout_type) | ||
| 99 | { | ||
| 100 | oir->objlay = OBJLAYOUT(pnfs_layout_type); | ||
| 101 | oir->rpcdata = rpcdata; | ||
| 102 | INIT_LIST_HEAD(&oir->err_list); | ||
| 103 | oir->num_comps = num_comps; | ||
| 104 | oir->ioerrs = ioerrs; | ||
| 105 | } | ||
| 106 | |||
| 103 | /* | 107 | /* |
| 104 | * Raid engine I/O API | 108 | * Raid engine I/O API |
| 105 | */ | 109 | */ |
| @@ -110,28 +114,24 @@ extern int objio_alloc_lseg(struct pnfs_layout_segment **outp, | |||
| 110 | gfp_t gfp_flags); | 114 | gfp_t gfp_flags); |
| 111 | extern void objio_free_lseg(struct pnfs_layout_segment *lseg); | 115 | extern void objio_free_lseg(struct pnfs_layout_segment *lseg); |
| 112 | 116 | ||
| 113 | extern int objio_alloc_io_state( | 117 | /* objio_free_result will free these @oir structs recieved from |
| 114 | struct pnfs_layout_segment *lseg, | 118 | * objlayout_{read,write}_done |
| 115 | struct objlayout_io_state **outp, | 119 | */ |
| 116 | gfp_t gfp_flags); | 120 | extern void objio_free_result(struct objlayout_io_res *oir); |
| 117 | extern void objio_free_io_state(struct objlayout_io_state *state); | ||
| 118 | 121 | ||
| 119 | extern ssize_t objio_read_pagelist(struct objlayout_io_state *ol_state); | 122 | extern int objio_read_pagelist(struct nfs_read_data *rdata); |
| 120 | extern ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state, | 123 | extern int objio_write_pagelist(struct nfs_write_data *wdata, int how); |
| 121 | bool stable); | ||
| 122 | 124 | ||
| 123 | /* | 125 | /* |
| 124 | * callback API | 126 | * callback API |
| 125 | */ | 127 | */ |
| 126 | extern void objlayout_io_set_result(struct objlayout_io_state *state, | 128 | extern void objlayout_io_set_result(struct objlayout_io_res *oir, |
| 127 | unsigned index, struct pnfs_osd_objid *pooid, | 129 | unsigned index, struct pnfs_osd_objid *pooid, |
| 128 | int osd_error, u64 offset, u64 length, bool is_write); | 130 | int osd_error, u64 offset, u64 length, bool is_write); |
| 129 | 131 | ||
| 130 | static inline void | 132 | static inline void |
| 131 | objlayout_add_delta_space_used(struct objlayout_io_state *state, s64 space_used) | 133 | objlayout_add_delta_space_used(struct objlayout *objlay, s64 space_used) |
| 132 | { | 134 | { |
| 133 | struct objlayout *objlay = OBJLAYOUT(state->lseg->pls_layout); | ||
| 134 | |||
| 135 | /* If one of the I/Os errored out and the delta_space_used was | 135 | /* If one of the I/Os errored out and the delta_space_used was |
| 136 | * invalid we render the complete report as invalid. Protocol mandate | 136 | * invalid we render the complete report as invalid. Protocol mandate |
| 137 | * the DSU be accurate or not reported. | 137 | * the DSU be accurate or not reported. |
| @@ -144,9 +144,9 @@ objlayout_add_delta_space_used(struct objlayout_io_state *state, s64 space_used) | |||
| 144 | spin_unlock(&objlay->lock); | 144 | spin_unlock(&objlay->lock); |
| 145 | } | 145 | } |
| 146 | 146 | ||
| 147 | extern void objlayout_read_done(struct objlayout_io_state *state, | 147 | extern void objlayout_read_done(struct objlayout_io_res *oir, |
| 148 | ssize_t status, bool sync); | 148 | ssize_t status, bool sync); |
| 149 | extern void objlayout_write_done(struct objlayout_io_state *state, | 149 | extern void objlayout_write_done(struct objlayout_io_res *oir, |
| 150 | ssize_t status, bool sync); | 150 | ssize_t status, bool sync); |
| 151 | 151 | ||
| 152 | extern int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay, | 152 | extern int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay, |
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index b60970cc7f1f..0a5ff5c19511 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c | |||
| @@ -41,7 +41,7 @@ nfs_page_free(struct nfs_page *p) | |||
| 41 | 41 | ||
| 42 | /** | 42 | /** |
| 43 | * nfs_create_request - Create an NFS read/write request. | 43 | * nfs_create_request - Create an NFS read/write request. |
| 44 | * @file: file descriptor to use | 44 | * @ctx: open context to use |
| 45 | * @inode: inode to which the request is attached | 45 | * @inode: inode to which the request is attached |
| 46 | * @page: page to write | 46 | * @page: page to write |
| 47 | * @offset: starting offset within the page for the write | 47 | * @offset: starting offset within the page for the write |
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index ee73d9a4f700..a2478bc74442 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
| @@ -1443,17 +1443,31 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync) | |||
| 1443 | /* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */ | 1443 | /* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */ |
| 1444 | data = kzalloc(sizeof(*data), GFP_NOFS); | 1444 | data = kzalloc(sizeof(*data), GFP_NOFS); |
| 1445 | if (!data) { | 1445 | if (!data) { |
| 1446 | mark_inode_dirty_sync(inode); | ||
| 1447 | status = -ENOMEM; | 1446 | status = -ENOMEM; |
| 1448 | goto out; | 1447 | goto out; |
| 1449 | } | 1448 | } |
| 1450 | 1449 | ||
| 1450 | if (!test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) | ||
| 1451 | goto out_free; | ||
| 1452 | |||
| 1453 | if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) { | ||
| 1454 | if (!sync) { | ||
| 1455 | status = -EAGAIN; | ||
| 1456 | goto out_free; | ||
| 1457 | } | ||
| 1458 | status = wait_on_bit_lock(&nfsi->flags, NFS_INO_LAYOUTCOMMITTING, | ||
| 1459 | nfs_wait_bit_killable, TASK_KILLABLE); | ||
| 1460 | if (status) | ||
| 1461 | goto out_free; | ||
| 1462 | } | ||
| 1463 | |||
| 1451 | INIT_LIST_HEAD(&data->lseg_list); | 1464 | INIT_LIST_HEAD(&data->lseg_list); |
| 1452 | spin_lock(&inode->i_lock); | 1465 | spin_lock(&inode->i_lock); |
| 1453 | if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { | 1466 | if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { |
| 1467 | clear_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags); | ||
| 1454 | spin_unlock(&inode->i_lock); | 1468 | spin_unlock(&inode->i_lock); |
| 1455 | kfree(data); | 1469 | wake_up_bit(&nfsi->flags, NFS_INO_LAYOUTCOMMITTING); |
| 1456 | goto out; | 1470 | goto out_free; |
| 1457 | } | 1471 | } |
| 1458 | 1472 | ||
| 1459 | pnfs_list_write_lseg(inode, &data->lseg_list); | 1473 | pnfs_list_write_lseg(inode, &data->lseg_list); |
| @@ -1475,6 +1489,11 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync) | |||
| 1475 | 1489 | ||
| 1476 | status = nfs4_proc_layoutcommit(data, sync); | 1490 | status = nfs4_proc_layoutcommit(data, sync); |
| 1477 | out: | 1491 | out: |
| 1492 | if (status) | ||
| 1493 | mark_inode_dirty_sync(inode); | ||
| 1478 | dprintk("<-- %s status %d\n", __func__, status); | 1494 | dprintk("<-- %s status %d\n", __func__, status); |
| 1479 | return status; | 1495 | return status; |
| 1496 | out_free: | ||
| 1497 | kfree(data); | ||
| 1498 | goto out; | ||
| 1480 | } | 1499 | } |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 2219c88d96b2..b016b8a36399 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
| @@ -1243,7 +1243,6 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) | |||
| 1243 | { | 1243 | { |
| 1244 | struct nfs_writeargs *argp = &data->args; | 1244 | struct nfs_writeargs *argp = &data->args; |
| 1245 | struct nfs_writeres *resp = &data->res; | 1245 | struct nfs_writeres *resp = &data->res; |
| 1246 | struct nfs_server *server = NFS_SERVER(data->inode); | ||
| 1247 | int status; | 1246 | int status; |
| 1248 | 1247 | ||
| 1249 | dprintk("NFS: %5u nfs_writeback_done (status %d)\n", | 1248 | dprintk("NFS: %5u nfs_writeback_done (status %d)\n", |
| @@ -1277,7 +1276,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) | |||
| 1277 | if (time_before(complain, jiffies)) { | 1276 | if (time_before(complain, jiffies)) { |
| 1278 | dprintk("NFS: faulty NFS server %s:" | 1277 | dprintk("NFS: faulty NFS server %s:" |
| 1279 | " (committed = %d) != (stable = %d)\n", | 1278 | " (committed = %d) != (stable = %d)\n", |
| 1280 | server->nfs_client->cl_hostname, | 1279 | NFS_SERVER(data->inode)->nfs_client->cl_hostname, |
| 1281 | resp->verf->committed, argp->stable); | 1280 | resp->verf->committed, argp->stable); |
| 1282 | complain = jiffies + 300 * HZ; | 1281 | complain = jiffies + 300 * HZ; |
| 1283 | } | 1282 | } |
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index dc5a1bf476b1..52cd976b6099 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c | |||
| @@ -256,6 +256,8 @@ static void nfsd_last_thread(struct svc_serv *serv) | |||
| 256 | nfsd_serv = NULL; | 256 | nfsd_serv = NULL; |
| 257 | nfsd_shutdown(); | 257 | nfsd_shutdown(); |
| 258 | 258 | ||
| 259 | svc_rpcb_cleanup(serv); | ||
| 260 | |||
| 259 | printk(KERN_WARNING "nfsd: last server has exited, flushing export " | 261 | printk(KERN_WARNING "nfsd: last server has exited, flushing export " |
| 260 | "cache\n"); | 262 | "cache\n"); |
| 261 | nfsd_export_flush(); | 263 | nfsd_export_flush(); |
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 60a137b7f171..ab2c6343361a 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h | |||
| @@ -229,6 +229,7 @@ struct nfs_inode { | |||
| 229 | #define NFS_INO_COMMIT (7) /* inode is committing unstable writes */ | 229 | #define NFS_INO_COMMIT (7) /* inode is committing unstable writes */ |
| 230 | #define NFS_INO_PNFS_COMMIT (8) /* use pnfs code for commit */ | 230 | #define NFS_INO_PNFS_COMMIT (8) /* use pnfs code for commit */ |
| 231 | #define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */ | 231 | #define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */ |
| 232 | #define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */ | ||
| 232 | 233 | ||
| 233 | static inline struct nfs_inode *NFS_I(const struct inode *inode) | 234 | static inline struct nfs_inode *NFS_I(const struct inode *inode) |
| 234 | { | 235 | { |
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 492486a74484..3d8f9c44e27d 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h | |||
| @@ -136,6 +136,8 @@ void rpc_shutdown_client(struct rpc_clnt *); | |||
| 136 | void rpc_release_client(struct rpc_clnt *); | 136 | void rpc_release_client(struct rpc_clnt *); |
| 137 | void rpc_task_release_client(struct rpc_task *); | 137 | void rpc_task_release_client(struct rpc_task *); |
| 138 | 138 | ||
| 139 | int rpcb_create_local(void); | ||
| 140 | void rpcb_put_local(void); | ||
| 139 | int rpcb_register(u32, u32, int, unsigned short); | 141 | int rpcb_register(u32, u32, int, unsigned short); |
| 140 | int rpcb_v4_register(const u32 program, const u32 version, | 142 | int rpcb_v4_register(const u32 program, const u32 version, |
| 141 | const struct sockaddr *address, | 143 | const struct sockaddr *address, |
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index d8d5d93071b3..35b37b1e9299 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h | |||
| @@ -413,6 +413,7 @@ struct svc_procedure { | |||
| 413 | /* | 413 | /* |
| 414 | * Function prototypes. | 414 | * Function prototypes. |
| 415 | */ | 415 | */ |
| 416 | void svc_rpcb_cleanup(struct svc_serv *serv); | ||
| 416 | struct svc_serv *svc_create(struct svc_program *, unsigned int, | 417 | struct svc_serv *svc_create(struct svc_program *, unsigned int, |
| 417 | void (*shutdown)(struct svc_serv *)); | 418 | void (*shutdown)(struct svc_serv *)); |
| 418 | struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, | 419 | struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, |
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 4cb70dc6e7ad..e50502d8ceb7 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c | |||
| @@ -129,6 +129,9 @@ unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags) | |||
| 129 | for (i = 0; i < groups ; i++) | 129 | for (i = 0; i < groups ; i++) |
| 130 | if (cred->uc_gids[i] != GROUP_AT(acred->group_info, i)) | 130 | if (cred->uc_gids[i] != GROUP_AT(acred->group_info, i)) |
| 131 | return 0; | 131 | return 0; |
| 132 | if (groups < NFS_NGROUPS && | ||
| 133 | cred->uc_gids[groups] != NOGROUP) | ||
| 134 | return 0; | ||
| 132 | return 1; | 135 | return 1; |
| 133 | } | 136 | } |
| 134 | 137 | ||
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index f588b852d41c..8761bf8e36fc 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c | |||
| @@ -114,6 +114,9 @@ static struct rpc_program rpcb_program; | |||
| 114 | static struct rpc_clnt * rpcb_local_clnt; | 114 | static struct rpc_clnt * rpcb_local_clnt; |
| 115 | static struct rpc_clnt * rpcb_local_clnt4; | 115 | static struct rpc_clnt * rpcb_local_clnt4; |
| 116 | 116 | ||
| 117 | DEFINE_SPINLOCK(rpcb_clnt_lock); | ||
| 118 | unsigned int rpcb_users; | ||
| 119 | |||
| 117 | struct rpcbind_args { | 120 | struct rpcbind_args { |
| 118 | struct rpc_xprt * r_xprt; | 121 | struct rpc_xprt * r_xprt; |
| 119 | 122 | ||
| @@ -161,6 +164,56 @@ static void rpcb_map_release(void *data) | |||
| 161 | kfree(map); | 164 | kfree(map); |
| 162 | } | 165 | } |
| 163 | 166 | ||
| 167 | static int rpcb_get_local(void) | ||
| 168 | { | ||
| 169 | int cnt; | ||
| 170 | |||
| 171 | spin_lock(&rpcb_clnt_lock); | ||
| 172 | if (rpcb_users) | ||
| 173 | rpcb_users++; | ||
| 174 | cnt = rpcb_users; | ||
| 175 | spin_unlock(&rpcb_clnt_lock); | ||
| 176 | |||
| 177 | return cnt; | ||
| 178 | } | ||
| 179 | |||
| 180 | void rpcb_put_local(void) | ||
| 181 | { | ||
| 182 | struct rpc_clnt *clnt = rpcb_local_clnt; | ||
| 183 | struct rpc_clnt *clnt4 = rpcb_local_clnt4; | ||
| 184 | int shutdown; | ||
| 185 | |||
| 186 | spin_lock(&rpcb_clnt_lock); | ||
| 187 | if (--rpcb_users == 0) { | ||
| 188 | rpcb_local_clnt = NULL; | ||
| 189 | rpcb_local_clnt4 = NULL; | ||
| 190 | } | ||
| 191 | shutdown = !rpcb_users; | ||
| 192 | spin_unlock(&rpcb_clnt_lock); | ||
| 193 | |||
| 194 | if (shutdown) { | ||
| 195 | /* | ||
| 196 | * cleanup_rpcb_clnt - remove xprtsock's sysctls, unregister | ||
| 197 | */ | ||
| 198 | if (clnt4) | ||
| 199 | rpc_shutdown_client(clnt4); | ||
| 200 | if (clnt) | ||
| 201 | rpc_shutdown_client(clnt); | ||
| 202 | } | ||
| 203 | } | ||
| 204 | |||
| 205 | static void rpcb_set_local(struct rpc_clnt *clnt, struct rpc_clnt *clnt4) | ||
| 206 | { | ||
| 207 | /* Protected by rpcb_create_local_mutex */ | ||
| 208 | rpcb_local_clnt = clnt; | ||
| 209 | rpcb_local_clnt4 = clnt4; | ||
| 210 | smp_wmb(); | ||
| 211 | rpcb_users = 1; | ||
| 212 | dprintk("RPC: created new rpcb local clients (rpcb_local_clnt: " | ||
| 213 | "%p, rpcb_local_clnt4: %p)\n", rpcb_local_clnt, | ||
| 214 | rpcb_local_clnt4); | ||
| 215 | } | ||
| 216 | |||
| 164 | /* | 217 | /* |
| 165 | * Returns zero on success, otherwise a negative errno value | 218 | * Returns zero on success, otherwise a negative errno value |
| 166 | * is returned. | 219 | * is returned. |
| @@ -205,9 +258,7 @@ static int rpcb_create_local_unix(void) | |||
| 205 | clnt4 = NULL; | 258 | clnt4 = NULL; |
| 206 | } | 259 | } |
| 207 | 260 | ||
| 208 | /* Protected by rpcb_create_local_mutex */ | 261 | rpcb_set_local(clnt, clnt4); |
| 209 | rpcb_local_clnt = clnt; | ||
| 210 | rpcb_local_clnt4 = clnt4; | ||
| 211 | 262 | ||
| 212 | out: | 263 | out: |
| 213 | return result; | 264 | return result; |
| @@ -259,9 +310,7 @@ static int rpcb_create_local_net(void) | |||
| 259 | clnt4 = NULL; | 310 | clnt4 = NULL; |
| 260 | } | 311 | } |
| 261 | 312 | ||
| 262 | /* Protected by rpcb_create_local_mutex */ | 313 | rpcb_set_local(clnt, clnt4); |
| 263 | rpcb_local_clnt = clnt; | ||
| 264 | rpcb_local_clnt4 = clnt4; | ||
| 265 | 314 | ||
| 266 | out: | 315 | out: |
| 267 | return result; | 316 | return result; |
| @@ -271,16 +320,16 @@ out: | |||
| 271 | * Returns zero on success, otherwise a negative errno value | 320 | * Returns zero on success, otherwise a negative errno value |
| 272 | * is returned. | 321 | * is returned. |
| 273 | */ | 322 | */ |
| 274 | static int rpcb_create_local(void) | 323 | int rpcb_create_local(void) |
| 275 | { | 324 | { |
| 276 | static DEFINE_MUTEX(rpcb_create_local_mutex); | 325 | static DEFINE_MUTEX(rpcb_create_local_mutex); |
| 277 | int result = 0; | 326 | int result = 0; |
| 278 | 327 | ||
| 279 | if (rpcb_local_clnt) | 328 | if (rpcb_get_local()) |
| 280 | return result; | 329 | return result; |
| 281 | 330 | ||
| 282 | mutex_lock(&rpcb_create_local_mutex); | 331 | mutex_lock(&rpcb_create_local_mutex); |
| 283 | if (rpcb_local_clnt) | 332 | if (rpcb_get_local()) |
| 284 | goto out; | 333 | goto out; |
| 285 | 334 | ||
| 286 | if (rpcb_create_local_unix() != 0) | 335 | if (rpcb_create_local_unix() != 0) |
| @@ -382,11 +431,6 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port) | |||
| 382 | struct rpc_message msg = { | 431 | struct rpc_message msg = { |
| 383 | .rpc_argp = &map, | 432 | .rpc_argp = &map, |
| 384 | }; | 433 | }; |
| 385 | int error; | ||
| 386 | |||
| 387 | error = rpcb_create_local(); | ||
| 388 | if (error) | ||
| 389 | return error; | ||
| 390 | 434 | ||
| 391 | dprintk("RPC: %sregistering (%u, %u, %d, %u) with local " | 435 | dprintk("RPC: %sregistering (%u, %u, %d, %u) with local " |
| 392 | "rpcbind\n", (port ? "" : "un"), | 436 | "rpcbind\n", (port ? "" : "un"), |
| @@ -522,11 +566,7 @@ int rpcb_v4_register(const u32 program, const u32 version, | |||
| 522 | struct rpc_message msg = { | 566 | struct rpc_message msg = { |
| 523 | .rpc_argp = &map, | 567 | .rpc_argp = &map, |
| 524 | }; | 568 | }; |
| 525 | int error; | ||
| 526 | 569 | ||
| 527 | error = rpcb_create_local(); | ||
| 528 | if (error) | ||
| 529 | return error; | ||
| 530 | if (rpcb_local_clnt4 == NULL) | 570 | if (rpcb_local_clnt4 == NULL) |
| 531 | return -EPROTONOSUPPORT; | 571 | return -EPROTONOSUPPORT; |
| 532 | 572 | ||
| @@ -1060,15 +1100,3 @@ static struct rpc_program rpcb_program = { | |||
| 1060 | .version = rpcb_version, | 1100 | .version = rpcb_version, |
| 1061 | .stats = &rpcb_stats, | 1101 | .stats = &rpcb_stats, |
| 1062 | }; | 1102 | }; |
| 1063 | |||
| 1064 | /** | ||
| 1065 | * cleanup_rpcb_clnt - remove xprtsock's sysctls, unregister | ||
| 1066 | * | ||
| 1067 | */ | ||
| 1068 | void cleanup_rpcb_clnt(void) | ||
| 1069 | { | ||
| 1070 | if (rpcb_local_clnt4) | ||
| 1071 | rpc_shutdown_client(rpcb_local_clnt4); | ||
| 1072 | if (rpcb_local_clnt) | ||
| 1073 | rpc_shutdown_client(rpcb_local_clnt); | ||
| 1074 | } | ||
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 9d0809160994..8ec9778c3f4a 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c | |||
| @@ -61,8 +61,6 @@ static struct pernet_operations sunrpc_net_ops = { | |||
| 61 | 61 | ||
| 62 | extern struct cache_detail unix_gid_cache; | 62 | extern struct cache_detail unix_gid_cache; |
| 63 | 63 | ||
| 64 | extern void cleanup_rpcb_clnt(void); | ||
| 65 | |||
| 66 | static int __init | 64 | static int __init |
| 67 | init_sunrpc(void) | 65 | init_sunrpc(void) |
| 68 | { | 66 | { |
| @@ -102,7 +100,6 @@ out: | |||
| 102 | static void __exit | 100 | static void __exit |
| 103 | cleanup_sunrpc(void) | 101 | cleanup_sunrpc(void) |
| 104 | { | 102 | { |
| 105 | cleanup_rpcb_clnt(); | ||
| 106 | rpcauth_remove_module(); | 103 | rpcauth_remove_module(); |
| 107 | cleanup_socket_xprt(); | 104 | cleanup_socket_xprt(); |
| 108 | svc_cleanup_xprt_sock(); | 105 | svc_cleanup_xprt_sock(); |
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index dd5cc00ed559..6e038884ae0c 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c | |||
| @@ -366,6 +366,42 @@ svc_pool_for_cpu(struct svc_serv *serv, int cpu) | |||
| 366 | return &serv->sv_pools[pidx % serv->sv_nrpools]; | 366 | return &serv->sv_pools[pidx % serv->sv_nrpools]; |
| 367 | } | 367 | } |
| 368 | 368 | ||
| 369 | static int svc_rpcb_setup(struct svc_serv *serv) | ||
| 370 | { | ||
| 371 | int err; | ||
| 372 | |||
| 373 | err = rpcb_create_local(); | ||
| 374 | if (err) | ||
| 375 | return err; | ||
| 376 | |||
| 377 | /* Remove any stale portmap registrations */ | ||
| 378 | svc_unregister(serv); | ||
| 379 | return 0; | ||
| 380 | } | ||
| 381 | |||
| 382 | void svc_rpcb_cleanup(struct svc_serv *serv) | ||
| 383 | { | ||
| 384 | svc_unregister(serv); | ||
| 385 | rpcb_put_local(); | ||
| 386 | } | ||
| 387 | EXPORT_SYMBOL_GPL(svc_rpcb_cleanup); | ||
| 388 | |||
| 389 | static int svc_uses_rpcbind(struct svc_serv *serv) | ||
| 390 | { | ||
| 391 | struct svc_program *progp; | ||
| 392 | unsigned int i; | ||
| 393 | |||
| 394 | for (progp = serv->sv_program; progp; progp = progp->pg_next) { | ||
| 395 | for (i = 0; i < progp->pg_nvers; i++) { | ||
| 396 | if (progp->pg_vers[i] == NULL) | ||
| 397 | continue; | ||
| 398 | if (progp->pg_vers[i]->vs_hidden == 0) | ||
| 399 | return 1; | ||
| 400 | } | ||
| 401 | } | ||
| 402 | |||
| 403 | return 0; | ||
| 404 | } | ||
| 369 | 405 | ||
| 370 | /* | 406 | /* |
| 371 | * Create an RPC service | 407 | * Create an RPC service |
| @@ -431,8 +467,15 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, | |||
| 431 | spin_lock_init(&pool->sp_lock); | 467 | spin_lock_init(&pool->sp_lock); |
| 432 | } | 468 | } |
| 433 | 469 | ||
| 434 | /* Remove any stale portmap registrations */ | 470 | if (svc_uses_rpcbind(serv)) { |
| 435 | svc_unregister(serv); | 471 | if (svc_rpcb_setup(serv) < 0) { |
| 472 | kfree(serv->sv_pools); | ||
| 473 | kfree(serv); | ||
| 474 | return NULL; | ||
| 475 | } | ||
| 476 | if (!serv->sv_shutdown) | ||
| 477 | serv->sv_shutdown = svc_rpcb_cleanup; | ||
| 478 | } | ||
| 436 | 479 | ||
| 437 | return serv; | 480 | return serv; |
| 438 | } | 481 | } |
| @@ -500,7 +543,6 @@ svc_destroy(struct svc_serv *serv) | |||
| 500 | if (svc_serv_is_pooled(serv)) | 543 | if (svc_serv_is_pooled(serv)) |
| 501 | svc_pool_map_put(); | 544 | svc_pool_map_put(); |
| 502 | 545 | ||
| 503 | svc_unregister(serv); | ||
| 504 | kfree(serv->sv_pools); | 546 | kfree(serv->sv_pools); |
| 505 | kfree(serv); | 547 | kfree(serv); |
| 506 | } | 548 | } |
