diff options
-rw-r--r-- | fs/exofs/Kconfig | 2 | ||||
-rw-r--r-- | fs/nfs/callback_xdr.c | 12 | ||||
-rw-r--r-- | fs/nfs/file.c | 9 | ||||
-rw-r--r-- | fs/nfs/nfs4filelayout.c | 7 | ||||
-rw-r--r-- | fs/nfs/nfs4proc.c | 6 | ||||
-rw-r--r-- | fs/nfs/nfs4xdr.c | 2 | ||||
-rw-r--r-- | fs/nfs/objlayout/objio_osd.c | 872 | ||||
-rw-r--r-- | fs/nfs/objlayout/objlayout.c | 209 | ||||
-rw-r--r-- | fs/nfs/objlayout/objlayout.h | 48 | ||||
-rw-r--r-- | fs/nfs/pagelist.c | 2 | ||||
-rw-r--r-- | fs/nfs/pnfs.c | 25 | ||||
-rw-r--r-- | fs/nfs/write.c | 3 | ||||
-rw-r--r-- | fs/nfsd/nfssvc.c | 2 | ||||
-rw-r--r-- | include/linux/nfs_fs.h | 1 | ||||
-rw-r--r-- | include/linux/sunrpc/clnt.h | 2 | ||||
-rw-r--r-- | include/linux/sunrpc/svc.h | 1 | ||||
-rw-r--r-- | net/sunrpc/auth_unix.c | 3 | ||||
-rw-r--r-- | net/sunrpc/rpcb_clnt.c | 88 | ||||
-rw-r--r-- | net/sunrpc/sunrpc_syms.c | 3 | ||||
-rw-r--r-- | net/sunrpc/svc.c | 48 |
20 files changed, 457 insertions, 888 deletions
diff --git a/fs/exofs/Kconfig b/fs/exofs/Kconfig index fa9a286c8771..da42f32c49be 100644 --- a/fs/exofs/Kconfig +++ b/fs/exofs/Kconfig | |||
@@ -5,7 +5,7 @@ | |||
5 | # selected by any of the users. | 5 | # selected by any of the users. |
6 | config ORE | 6 | config ORE |
7 | tristate | 7 | tristate |
8 | depends on EXOFS_FS | 8 | depends on EXOFS_FS || PNFS_OBJLAYOUT |
9 | select ASYNC_XOR | 9 | select ASYNC_XOR |
10 | default SCSI_OSD_ULD | 10 | default SCSI_OSD_ULD |
11 | 11 | ||
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 918ad647afea..726e59a9e50f 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c | |||
@@ -488,17 +488,18 @@ static __be32 decode_recallany_args(struct svc_rqst *rqstp, | |||
488 | struct xdr_stream *xdr, | 488 | struct xdr_stream *xdr, |
489 | struct cb_recallanyargs *args) | 489 | struct cb_recallanyargs *args) |
490 | { | 490 | { |
491 | __be32 *p; | 491 | uint32_t bitmap[2]; |
492 | __be32 *p, status; | ||
492 | 493 | ||
493 | args->craa_addr = svc_addr(rqstp); | 494 | args->craa_addr = svc_addr(rqstp); |
494 | p = read_buf(xdr, 4); | 495 | p = read_buf(xdr, 4); |
495 | if (unlikely(p == NULL)) | 496 | if (unlikely(p == NULL)) |
496 | return htonl(NFS4ERR_BADXDR); | 497 | return htonl(NFS4ERR_BADXDR); |
497 | args->craa_objs_to_keep = ntohl(*p++); | 498 | args->craa_objs_to_keep = ntohl(*p++); |
498 | p = read_buf(xdr, 4); | 499 | status = decode_bitmap(xdr, bitmap); |
499 | if (unlikely(p == NULL)) | 500 | if (unlikely(status)) |
500 | return htonl(NFS4ERR_BADXDR); | 501 | return status; |
501 | args->craa_type_mask = ntohl(*p); | 502 | args->craa_type_mask = bitmap[0]; |
502 | 503 | ||
503 | return 0; | 504 | return 0; |
504 | } | 505 | } |
@@ -986,4 +987,5 @@ struct svc_version nfs4_callback_version4 = { | |||
986 | .vs_proc = nfs4_callback_procedures1, | 987 | .vs_proc = nfs4_callback_procedures1, |
987 | .vs_xdrsize = NFS4_CALLBACK_XDRSIZE, | 988 | .vs_xdrsize = NFS4_CALLBACK_XDRSIZE, |
988 | .vs_dispatch = NULL, | 989 | .vs_dispatch = NULL, |
990 | .vs_hidden = 1, | ||
989 | }; | 991 | }; |
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 91c01f0a4c3b..0a1f8312b4dc 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -137,11 +137,9 @@ nfs_file_open(struct inode *inode, struct file *filp) | |||
137 | static int | 137 | static int |
138 | nfs_file_release(struct inode *inode, struct file *filp) | 138 | nfs_file_release(struct inode *inode, struct file *filp) |
139 | { | 139 | { |
140 | struct dentry *dentry = filp->f_path.dentry; | ||
141 | |||
142 | dprintk("NFS: release(%s/%s)\n", | 140 | dprintk("NFS: release(%s/%s)\n", |
143 | dentry->d_parent->d_name.name, | 141 | filp->f_path.dentry->d_parent->d_name.name, |
144 | dentry->d_name.name); | 142 | filp->f_path.dentry->d_name.name); |
145 | 143 | ||
146 | nfs_inc_stats(inode, NFSIOS_VFSRELEASE); | 144 | nfs_inc_stats(inode, NFSIOS_VFSRELEASE); |
147 | return nfs_release(inode, filp); | 145 | return nfs_release(inode, filp); |
@@ -228,14 +226,13 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov, | |||
228 | struct dentry * dentry = iocb->ki_filp->f_path.dentry; | 226 | struct dentry * dentry = iocb->ki_filp->f_path.dentry; |
229 | struct inode * inode = dentry->d_inode; | 227 | struct inode * inode = dentry->d_inode; |
230 | ssize_t result; | 228 | ssize_t result; |
231 | size_t count = iov_length(iov, nr_segs); | ||
232 | 229 | ||
233 | if (iocb->ki_filp->f_flags & O_DIRECT) | 230 | if (iocb->ki_filp->f_flags & O_DIRECT) |
234 | return nfs_file_direct_read(iocb, iov, nr_segs, pos); | 231 | return nfs_file_direct_read(iocb, iov, nr_segs, pos); |
235 | 232 | ||
236 | dprintk("NFS: read(%s/%s, %lu@%lu)\n", | 233 | dprintk("NFS: read(%s/%s, %lu@%lu)\n", |
237 | dentry->d_parent->d_name.name, dentry->d_name.name, | 234 | dentry->d_parent->d_name.name, dentry->d_name.name, |
238 | (unsigned long) count, (unsigned long) pos); | 235 | (unsigned long) iov_length(iov, nr_segs), (unsigned long) pos); |
239 | 236 | ||
240 | result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); | 237 | result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); |
241 | if (!result) { | 238 | if (!result) { |
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 09119418402f..12185aadb349 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c | |||
@@ -449,9 +449,8 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, | |||
449 | 449 | ||
450 | fl->dsaddr = dsaddr; | 450 | fl->dsaddr = dsaddr; |
451 | 451 | ||
452 | if (fl->first_stripe_index < 0 || | 452 | if (fl->first_stripe_index >= dsaddr->stripe_count) { |
453 | fl->first_stripe_index >= dsaddr->stripe_count) { | 453 | dprintk("%s Bad first_stripe_index %u\n", |
454 | dprintk("%s Bad first_stripe_index %d\n", | ||
455 | __func__, fl->first_stripe_index); | 454 | __func__, fl->first_stripe_index); |
456 | goto out_put; | 455 | goto out_put; |
457 | } | 456 | } |
@@ -552,7 +551,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, | |||
552 | 551 | ||
553 | /* Note that a zero value for num_fh is legal for STRIPE_SPARSE. | 552 | /* Note that a zero value for num_fh is legal for STRIPE_SPARSE. |
554 | * Futher checking is done in filelayout_check_layout */ | 553 | * Futher checking is done in filelayout_check_layout */ |
555 | if (fl->num_fh < 0 || fl->num_fh > | 554 | if (fl->num_fh > |
556 | max(NFS4_PNFS_MAX_STRIPE_CNT, NFS4_PNFS_MAX_MULTI_CNT)) | 555 | max(NFS4_PNFS_MAX_STRIPE_CNT, NFS4_PNFS_MAX_MULTI_CNT)) |
557 | goto out_err; | 556 | goto out_err; |
558 | 557 | ||
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d2ae413c986a..b60fddf606f7 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -5950,6 +5950,7 @@ static void nfs4_layoutcommit_release(void *calldata) | |||
5950 | { | 5950 | { |
5951 | struct nfs4_layoutcommit_data *data = calldata; | 5951 | struct nfs4_layoutcommit_data *data = calldata; |
5952 | struct pnfs_layout_segment *lseg, *tmp; | 5952 | struct pnfs_layout_segment *lseg, *tmp; |
5953 | unsigned long *bitlock = &NFS_I(data->args.inode)->flags; | ||
5953 | 5954 | ||
5954 | pnfs_cleanup_layoutcommit(data); | 5955 | pnfs_cleanup_layoutcommit(data); |
5955 | /* Matched by references in pnfs_set_layoutcommit */ | 5956 | /* Matched by references in pnfs_set_layoutcommit */ |
@@ -5959,6 +5960,11 @@ static void nfs4_layoutcommit_release(void *calldata) | |||
5959 | &lseg->pls_flags)) | 5960 | &lseg->pls_flags)) |
5960 | put_lseg(lseg); | 5961 | put_lseg(lseg); |
5961 | } | 5962 | } |
5963 | |||
5964 | clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock); | ||
5965 | smp_mb__after_clear_bit(); | ||
5966 | wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING); | ||
5967 | |||
5962 | put_rpccred(data->cred); | 5968 | put_rpccred(data->cred); |
5963 | kfree(data); | 5969 | kfree(data); |
5964 | } | 5970 | } |
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 1dce12f41a4f..e6161b213ed1 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c | |||
@@ -6602,8 +6602,6 @@ static int nfs4_xdr_dec_secinfo(struct rpc_rqst *rqstp, | |||
6602 | if (status) | 6602 | if (status) |
6603 | goto out; | 6603 | goto out; |
6604 | status = decode_secinfo(xdr, res); | 6604 | status = decode_secinfo(xdr, res); |
6605 | if (status) | ||
6606 | goto out; | ||
6607 | out: | 6605 | out: |
6608 | return status; | 6606 | return status; |
6609 | } | 6607 | } |
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index d0cda12fddc3..c807ab93140e 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c | |||
@@ -38,21 +38,15 @@ | |||
38 | */ | 38 | */ |
39 | 39 | ||
40 | #include <linux/module.h> | 40 | #include <linux/module.h> |
41 | #include <scsi/osd_initiator.h> | 41 | #include <scsi/osd_ore.h> |
42 | 42 | ||
43 | #include "objlayout.h" | 43 | #include "objlayout.h" |
44 | 44 | ||
45 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | 45 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD |
46 | 46 | ||
47 | #define _LLU(x) ((unsigned long long)x) | ||
48 | |||
49 | enum { BIO_MAX_PAGES_KMALLOC = | ||
50 | (PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec), | ||
51 | }; | ||
52 | |||
53 | struct objio_dev_ent { | 47 | struct objio_dev_ent { |
54 | struct nfs4_deviceid_node id_node; | 48 | struct nfs4_deviceid_node id_node; |
55 | struct osd_dev *od; | 49 | struct ore_dev od; |
56 | }; | 50 | }; |
57 | 51 | ||
58 | static void | 52 | static void |
@@ -60,8 +54,8 @@ objio_free_deviceid_node(struct nfs4_deviceid_node *d) | |||
60 | { | 54 | { |
61 | struct objio_dev_ent *de = container_of(d, struct objio_dev_ent, id_node); | 55 | struct objio_dev_ent *de = container_of(d, struct objio_dev_ent, id_node); |
62 | 56 | ||
63 | dprintk("%s: free od=%p\n", __func__, de->od); | 57 | dprintk("%s: free od=%p\n", __func__, de->od.od); |
64 | osduld_put_device(de->od); | 58 | osduld_put_device(de->od.od); |
65 | kfree(de); | 59 | kfree(de); |
66 | } | 60 | } |
67 | 61 | ||
@@ -98,12 +92,12 @@ _dev_list_add(const struct nfs_server *nfss, | |||
98 | nfss->pnfs_curr_ld, | 92 | nfss->pnfs_curr_ld, |
99 | nfss->nfs_client, | 93 | nfss->nfs_client, |
100 | d_id); | 94 | d_id); |
101 | de->od = od; | 95 | de->od.od = od; |
102 | 96 | ||
103 | d = nfs4_insert_deviceid_node(&de->id_node); | 97 | d = nfs4_insert_deviceid_node(&de->id_node); |
104 | n = container_of(d, struct objio_dev_ent, id_node); | 98 | n = container_of(d, struct objio_dev_ent, id_node); |
105 | if (n != de) { | 99 | if (n != de) { |
106 | dprintk("%s: Race with other n->od=%p\n", __func__, n->od); | 100 | dprintk("%s: Race with other n->od=%p\n", __func__, n->od.od); |
107 | objio_free_deviceid_node(&de->id_node); | 101 | objio_free_deviceid_node(&de->id_node); |
108 | de = n; | 102 | de = n; |
109 | } | 103 | } |
@@ -111,28 +105,11 @@ _dev_list_add(const struct nfs_server *nfss, | |||
111 | return de; | 105 | return de; |
112 | } | 106 | } |
113 | 107 | ||
114 | struct caps_buffers { | ||
115 | u8 caps_key[OSD_CRYPTO_KEYID_SIZE]; | ||
116 | u8 creds[OSD_CAP_LEN]; | ||
117 | }; | ||
118 | |||
119 | struct objio_segment { | 108 | struct objio_segment { |
120 | struct pnfs_layout_segment lseg; | 109 | struct pnfs_layout_segment lseg; |
121 | 110 | ||
122 | struct pnfs_osd_object_cred *comps; | 111 | struct ore_layout layout; |
123 | 112 | struct ore_components oc; | |
124 | unsigned mirrors_p1; | ||
125 | unsigned stripe_unit; | ||
126 | unsigned group_width; /* Data stripe_units without integrity comps */ | ||
127 | u64 group_depth; | ||
128 | unsigned group_count; | ||
129 | |||
130 | unsigned max_io_size; | ||
131 | |||
132 | unsigned comps_index; | ||
133 | unsigned num_comps; | ||
134 | /* variable length */ | ||
135 | struct objio_dev_ent *ods[]; | ||
136 | }; | 113 | }; |
137 | 114 | ||
138 | static inline struct objio_segment * | 115 | static inline struct objio_segment * |
@@ -141,59 +118,44 @@ OBJIO_LSEG(struct pnfs_layout_segment *lseg) | |||
141 | return container_of(lseg, struct objio_segment, lseg); | 118 | return container_of(lseg, struct objio_segment, lseg); |
142 | } | 119 | } |
143 | 120 | ||
144 | struct objio_state; | ||
145 | typedef ssize_t (*objio_done_fn)(struct objio_state *ios); | ||
146 | |||
147 | struct objio_state { | 121 | struct objio_state { |
148 | /* Generic layer */ | 122 | /* Generic layer */ |
149 | struct objlayout_io_state ol_state; | 123 | struct objlayout_io_res oir; |
150 | 124 | ||
151 | struct objio_segment *layout; | 125 | bool sync; |
152 | 126 | /*FIXME: Support for extra_bytes at ore_get_rw_state() */ | |
153 | struct kref kref; | 127 | struct ore_io_state *ios; |
154 | objio_done_fn done; | ||
155 | void *private; | ||
156 | |||
157 | unsigned long length; | ||
158 | unsigned numdevs; /* Actually used devs in this IO */ | ||
159 | /* A per-device variable array of size numdevs */ | ||
160 | struct _objio_per_comp { | ||
161 | struct bio *bio; | ||
162 | struct osd_request *or; | ||
163 | unsigned long length; | ||
164 | u64 offset; | ||
165 | unsigned dev; | ||
166 | } per_dev[]; | ||
167 | }; | 128 | }; |
168 | 129 | ||
169 | /* Send and wait for a get_device_info of devices in the layout, | 130 | /* Send and wait for a get_device_info of devices in the layout, |
170 | then look them up with the osd_initiator library */ | 131 | then look them up with the osd_initiator library */ |
171 | static struct objio_dev_ent *_device_lookup(struct pnfs_layout_hdr *pnfslay, | 132 | static int objio_devices_lookup(struct pnfs_layout_hdr *pnfslay, |
172 | struct objio_segment *objio_seg, unsigned comp, | 133 | struct objio_segment *objio_seg, unsigned c, struct nfs4_deviceid *d_id, |
173 | gfp_t gfp_flags) | 134 | gfp_t gfp_flags) |
174 | { | 135 | { |
175 | struct pnfs_osd_deviceaddr *deviceaddr; | 136 | struct pnfs_osd_deviceaddr *deviceaddr; |
176 | struct nfs4_deviceid *d_id; | ||
177 | struct objio_dev_ent *ode; | 137 | struct objio_dev_ent *ode; |
178 | struct osd_dev *od; | 138 | struct osd_dev *od; |
179 | struct osd_dev_info odi; | 139 | struct osd_dev_info odi; |
180 | int err; | 140 | int err; |
181 | 141 | ||
182 | d_id = &objio_seg->comps[comp].oc_object_id.oid_device_id; | ||
183 | |||
184 | ode = _dev_list_find(NFS_SERVER(pnfslay->plh_inode), d_id); | 142 | ode = _dev_list_find(NFS_SERVER(pnfslay->plh_inode), d_id); |
185 | if (ode) | 143 | if (ode) { |
186 | return ode; | 144 | objio_seg->oc.ods[c] = &ode->od; /* must use container_of */ |
145 | return 0; | ||
146 | } | ||
187 | 147 | ||
188 | err = objlayout_get_deviceinfo(pnfslay, d_id, &deviceaddr, gfp_flags); | 148 | err = objlayout_get_deviceinfo(pnfslay, d_id, &deviceaddr, gfp_flags); |
189 | if (unlikely(err)) { | 149 | if (unlikely(err)) { |
190 | dprintk("%s: objlayout_get_deviceinfo dev(%llx:%llx) =>%d\n", | 150 | dprintk("%s: objlayout_get_deviceinfo dev(%llx:%llx) =>%d\n", |
191 | __func__, _DEVID_LO(d_id), _DEVID_HI(d_id), err); | 151 | __func__, _DEVID_LO(d_id), _DEVID_HI(d_id), err); |
192 | return ERR_PTR(err); | 152 | return err; |
193 | } | 153 | } |
194 | 154 | ||
195 | odi.systemid_len = deviceaddr->oda_systemid.len; | 155 | odi.systemid_len = deviceaddr->oda_systemid.len; |
196 | if (odi.systemid_len > sizeof(odi.systemid)) { | 156 | if (odi.systemid_len > sizeof(odi.systemid)) { |
157 | dprintk("%s: odi.systemid_len > sizeof(systemid=%zd)\n", | ||
158 | __func__, sizeof(odi.systemid)); | ||
197 | err = -EINVAL; | 159 | err = -EINVAL; |
198 | goto out; | 160 | goto out; |
199 | } else if (odi.systemid_len) | 161 | } else if (odi.systemid_len) |
@@ -218,96 +180,53 @@ static struct objio_dev_ent *_device_lookup(struct pnfs_layout_hdr *pnfslay, | |||
218 | 180 | ||
219 | ode = _dev_list_add(NFS_SERVER(pnfslay->plh_inode), d_id, od, | 181 | ode = _dev_list_add(NFS_SERVER(pnfslay->plh_inode), d_id, od, |
220 | gfp_flags); | 182 | gfp_flags); |
221 | 183 | objio_seg->oc.ods[c] = &ode->od; /* must use container_of */ | |
184 | dprintk("Adding new dev_id(%llx:%llx)\n", | ||
185 | _DEVID_LO(d_id), _DEVID_HI(d_id)); | ||
222 | out: | 186 | out: |
223 | dprintk("%s: return=%d\n", __func__, err); | ||
224 | objlayout_put_deviceinfo(deviceaddr); | 187 | objlayout_put_deviceinfo(deviceaddr); |
225 | return err ? ERR_PTR(err) : ode; | 188 | return err; |
226 | } | 189 | } |
227 | 190 | ||
228 | static int objio_devices_lookup(struct pnfs_layout_hdr *pnfslay, | 191 | static void copy_single_comp(struct ore_components *oc, unsigned c, |
229 | struct objio_segment *objio_seg, | 192 | struct pnfs_osd_object_cred *src_comp) |
230 | gfp_t gfp_flags) | ||
231 | { | 193 | { |
232 | unsigned i; | 194 | struct ore_comp *ocomp = &oc->comps[c]; |
233 | int err; | ||
234 | 195 | ||
235 | /* lookup all devices */ | 196 | WARN_ON(src_comp->oc_cap_key.cred_len > 0); /* libosd is NO_SEC only */ |
236 | for (i = 0; i < objio_seg->num_comps; i++) { | 197 | WARN_ON(src_comp->oc_cap.cred_len > sizeof(ocomp->cred)); |
237 | struct objio_dev_ent *ode; | ||
238 | 198 | ||
239 | ode = _device_lookup(pnfslay, objio_seg, i, gfp_flags); | 199 | ocomp->obj.partition = src_comp->oc_object_id.oid_partition_id; |
240 | if (unlikely(IS_ERR(ode))) { | 200 | ocomp->obj.id = src_comp->oc_object_id.oid_object_id; |
241 | err = PTR_ERR(ode); | ||
242 | goto out; | ||
243 | } | ||
244 | objio_seg->ods[i] = ode; | ||
245 | } | ||
246 | err = 0; | ||
247 | 201 | ||
248 | out: | 202 | memcpy(ocomp->cred, src_comp->oc_cap.cred, sizeof(ocomp->cred)); |
249 | dprintk("%s: return=%d\n", __func__, err); | ||
250 | return err; | ||
251 | } | 203 | } |
252 | 204 | ||
253 | static int _verify_data_map(struct pnfs_osd_layout *layout) | 205 | int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags, |
206 | struct objio_segment **pseg) | ||
254 | { | 207 | { |
255 | struct pnfs_osd_data_map *data_map = &layout->olo_map; | 208 | struct __alloc_objio_segment { |
256 | u64 stripe_length; | 209 | struct objio_segment olseg; |
257 | u32 group_width; | 210 | struct ore_dev *ods[numdevs]; |
258 | 211 | struct ore_comp comps[numdevs]; | |
259 | /* FIXME: Only raid0 for now. if not go through MDS */ | 212 | } *aolseg; |
260 | if (data_map->odm_raid_algorithm != PNFS_OSD_RAID_0) { | ||
261 | printk(KERN_ERR "Only RAID_0 for now\n"); | ||
262 | return -ENOTSUPP; | ||
263 | } | ||
264 | if (0 != (data_map->odm_num_comps % (data_map->odm_mirror_cnt + 1))) { | ||
265 | printk(KERN_ERR "Data Map wrong, num_comps=%u mirrors=%u\n", | ||
266 | data_map->odm_num_comps, data_map->odm_mirror_cnt); | ||
267 | return -EINVAL; | ||
268 | } | ||
269 | 213 | ||
270 | if (data_map->odm_group_width) | 214 | aolseg = kzalloc(sizeof(*aolseg), gfp_flags); |
271 | group_width = data_map->odm_group_width; | 215 | if (unlikely(!aolseg)) { |
272 | else | 216 | dprintk("%s: Faild allocation numdevs=%d size=%zd\n", __func__, |
273 | group_width = data_map->odm_num_comps / | 217 | numdevs, sizeof(*aolseg)); |
274 | (data_map->odm_mirror_cnt + 1); | 218 | return -ENOMEM; |
275 | |||
276 | stripe_length = (u64)data_map->odm_stripe_unit * group_width; | ||
277 | if (stripe_length >= (1ULL << 32)) { | ||
278 | printk(KERN_ERR "Total Stripe length(0x%llx)" | ||
279 | " >= 32bit is not supported\n", _LLU(stripe_length)); | ||
280 | return -ENOTSUPP; | ||
281 | } | 219 | } |
282 | 220 | ||
283 | if (0 != (data_map->odm_stripe_unit & ~PAGE_MASK)) { | 221 | aolseg->olseg.oc.numdevs = numdevs; |
284 | printk(KERN_ERR "Stripe Unit(0x%llx)" | 222 | aolseg->olseg.oc.single_comp = EC_MULTPLE_COMPS; |
285 | " must be Multples of PAGE_SIZE(0x%lx)\n", | 223 | aolseg->olseg.oc.comps = aolseg->comps; |
286 | _LLU(data_map->odm_stripe_unit), PAGE_SIZE); | 224 | aolseg->olseg.oc.ods = aolseg->ods; |
287 | return -ENOTSUPP; | ||
288 | } | ||
289 | 225 | ||
226 | *pseg = &aolseg->olseg; | ||
290 | return 0; | 227 | return 0; |
291 | } | 228 | } |
292 | 229 | ||
293 | static void copy_single_comp(struct pnfs_osd_object_cred *cur_comp, | ||
294 | struct pnfs_osd_object_cred *src_comp, | ||
295 | struct caps_buffers *caps_p) | ||
296 | { | ||
297 | WARN_ON(src_comp->oc_cap_key.cred_len > sizeof(caps_p->caps_key)); | ||
298 | WARN_ON(src_comp->oc_cap.cred_len > sizeof(caps_p->creds)); | ||
299 | |||
300 | *cur_comp = *src_comp; | ||
301 | |||
302 | memcpy(caps_p->caps_key, src_comp->oc_cap_key.cred, | ||
303 | sizeof(caps_p->caps_key)); | ||
304 | cur_comp->oc_cap_key.cred = caps_p->caps_key; | ||
305 | |||
306 | memcpy(caps_p->creds, src_comp->oc_cap.cred, | ||
307 | sizeof(caps_p->creds)); | ||
308 | cur_comp->oc_cap.cred = caps_p->creds; | ||
309 | } | ||
310 | |||
311 | int objio_alloc_lseg(struct pnfs_layout_segment **outp, | 230 | int objio_alloc_lseg(struct pnfs_layout_segment **outp, |
312 | struct pnfs_layout_hdr *pnfslay, | 231 | struct pnfs_layout_hdr *pnfslay, |
313 | struct pnfs_layout_range *range, | 232 | struct pnfs_layout_range *range, |
@@ -317,59 +236,43 @@ int objio_alloc_lseg(struct pnfs_layout_segment **outp, | |||
317 | struct objio_segment *objio_seg; | 236 | struct objio_segment *objio_seg; |
318 | struct pnfs_osd_xdr_decode_layout_iter iter; | 237 | struct pnfs_osd_xdr_decode_layout_iter iter; |
319 | struct pnfs_osd_layout layout; | 238 | struct pnfs_osd_layout layout; |
320 | struct pnfs_osd_object_cred *cur_comp, src_comp; | 239 | struct pnfs_osd_object_cred src_comp; |
321 | struct caps_buffers *caps_p; | 240 | unsigned cur_comp; |
322 | int err; | 241 | int err; |
323 | 242 | ||
324 | err = pnfs_osd_xdr_decode_layout_map(&layout, &iter, xdr); | 243 | err = pnfs_osd_xdr_decode_layout_map(&layout, &iter, xdr); |
325 | if (unlikely(err)) | 244 | if (unlikely(err)) |
326 | return err; | 245 | return err; |
327 | 246 | ||
328 | err = _verify_data_map(&layout); | 247 | err = __alloc_objio_seg(layout.olo_num_comps, gfp_flags, &objio_seg); |
329 | if (unlikely(err)) | 248 | if (unlikely(err)) |
330 | return err; | 249 | return err; |
331 | 250 | ||
332 | objio_seg = kzalloc(sizeof(*objio_seg) + | 251 | objio_seg->layout.stripe_unit = layout.olo_map.odm_stripe_unit; |
333 | sizeof(objio_seg->ods[0]) * layout.olo_num_comps + | 252 | objio_seg->layout.group_width = layout.olo_map.odm_group_width; |
334 | sizeof(*objio_seg->comps) * layout.olo_num_comps + | 253 | objio_seg->layout.group_depth = layout.olo_map.odm_group_depth; |
335 | sizeof(struct caps_buffers) * layout.olo_num_comps, | 254 | objio_seg->layout.mirrors_p1 = layout.olo_map.odm_mirror_cnt + 1; |
336 | gfp_flags); | 255 | objio_seg->layout.raid_algorithm = layout.olo_map.odm_raid_algorithm; |
337 | if (!objio_seg) | ||
338 | return -ENOMEM; | ||
339 | 256 | ||
340 | objio_seg->comps = (void *)(objio_seg->ods + layout.olo_num_comps); | 257 | err = ore_verify_layout(layout.olo_map.odm_num_comps, |
341 | cur_comp = objio_seg->comps; | 258 | &objio_seg->layout); |
342 | caps_p = (void *)(cur_comp + layout.olo_num_comps); | ||
343 | while (pnfs_osd_xdr_decode_layout_comp(&src_comp, &iter, xdr, &err)) | ||
344 | copy_single_comp(cur_comp++, &src_comp, caps_p++); | ||
345 | if (unlikely(err)) | 259 | if (unlikely(err)) |
346 | goto err; | 260 | goto err; |
347 | 261 | ||
348 | objio_seg->num_comps = layout.olo_num_comps; | 262 | objio_seg->oc.first_dev = layout.olo_comps_index; |
349 | objio_seg->comps_index = layout.olo_comps_index; | 263 | cur_comp = 0; |
350 | err = objio_devices_lookup(pnfslay, objio_seg, gfp_flags); | 264 | while (pnfs_osd_xdr_decode_layout_comp(&src_comp, &iter, xdr, &err)) { |
351 | if (err) | 265 | copy_single_comp(&objio_seg->oc, cur_comp, &src_comp); |
352 | goto err; | 266 | err = objio_devices_lookup(pnfslay, objio_seg, cur_comp, |
353 | 267 | &src_comp.oc_object_id.oid_device_id, | |
354 | objio_seg->mirrors_p1 = layout.olo_map.odm_mirror_cnt + 1; | 268 | gfp_flags); |
355 | objio_seg->stripe_unit = layout.olo_map.odm_stripe_unit; | 269 | if (err) |
356 | if (layout.olo_map.odm_group_width) { | 270 | goto err; |
357 | objio_seg->group_width = layout.olo_map.odm_group_width; | 271 | ++cur_comp; |
358 | objio_seg->group_depth = layout.olo_map.odm_group_depth; | ||
359 | objio_seg->group_count = layout.olo_map.odm_num_comps / | ||
360 | objio_seg->mirrors_p1 / | ||
361 | objio_seg->group_width; | ||
362 | } else { | ||
363 | objio_seg->group_width = layout.olo_map.odm_num_comps / | ||
364 | objio_seg->mirrors_p1; | ||
365 | objio_seg->group_depth = -1; | ||
366 | objio_seg->group_count = 1; | ||
367 | } | 272 | } |
368 | 273 | /* pnfs_osd_xdr_decode_layout_comp returns false on error */ | |
369 | /* Cache this calculation it will hit for every page */ | 274 | if (unlikely(err)) |
370 | objio_seg->max_io_size = (BIO_MAX_PAGES_KMALLOC * PAGE_SIZE - | 275 | goto err; |
371 | objio_seg->stripe_unit) * | ||
372 | objio_seg->group_width; | ||
373 | 276 | ||
374 | *outp = &objio_seg->lseg; | 277 | *outp = &objio_seg->lseg; |
375 | return 0; | 278 | return 0; |
@@ -386,43 +289,63 @@ void objio_free_lseg(struct pnfs_layout_segment *lseg) | |||
386 | int i; | 289 | int i; |
387 | struct objio_segment *objio_seg = OBJIO_LSEG(lseg); | 290 | struct objio_segment *objio_seg = OBJIO_LSEG(lseg); |
388 | 291 | ||
389 | for (i = 0; i < objio_seg->num_comps; i++) { | 292 | for (i = 0; i < objio_seg->oc.numdevs; i++) { |
390 | if (!objio_seg->ods[i]) | 293 | struct ore_dev *od = objio_seg->oc.ods[i]; |
294 | struct objio_dev_ent *ode; | ||
295 | |||
296 | if (!od) | ||
391 | break; | 297 | break; |
392 | nfs4_put_deviceid_node(&objio_seg->ods[i]->id_node); | 298 | ode = container_of(od, typeof(*ode), od); |
299 | nfs4_put_deviceid_node(&ode->id_node); | ||
393 | } | 300 | } |
394 | kfree(objio_seg); | 301 | kfree(objio_seg); |
395 | } | 302 | } |
396 | 303 | ||
397 | int objio_alloc_io_state(struct pnfs_layout_segment *lseg, | 304 | static int |
398 | struct objlayout_io_state **outp, | 305 | objio_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type, bool is_reading, |
399 | gfp_t gfp_flags) | 306 | struct pnfs_layout_segment *lseg, struct page **pages, unsigned pgbase, |
307 | loff_t offset, size_t count, void *rpcdata, gfp_t gfp_flags, | ||
308 | struct objio_state **outp) | ||
400 | { | 309 | { |
401 | struct objio_segment *objio_seg = OBJIO_LSEG(lseg); | 310 | struct objio_segment *objio_seg = OBJIO_LSEG(lseg); |
402 | struct objio_state *ios; | 311 | struct ore_io_state *ios; |
403 | const unsigned first_size = sizeof(*ios) + | 312 | int ret; |
404 | objio_seg->num_comps * sizeof(ios->per_dev[0]); | 313 | struct __alloc_objio_state { |
405 | const unsigned sec_size = objio_seg->num_comps * | 314 | struct objio_state objios; |
406 | sizeof(ios->ol_state.ioerrs[0]); | 315 | struct pnfs_osd_ioerr ioerrs[objio_seg->oc.numdevs]; |
407 | 316 | } *aos; | |
408 | ios = kzalloc(first_size + sec_size, gfp_flags); | 317 | |
409 | if (unlikely(!ios)) | 318 | aos = kzalloc(sizeof(*aos), gfp_flags); |
319 | if (unlikely(!aos)) | ||
410 | return -ENOMEM; | 320 | return -ENOMEM; |
411 | 321 | ||
412 | ios->layout = objio_seg; | 322 | objlayout_init_ioerrs(&aos->objios.oir, objio_seg->oc.numdevs, |
413 | ios->ol_state.ioerrs = ((void *)ios) + first_size; | 323 | aos->ioerrs, rpcdata, pnfs_layout_type); |
414 | ios->ol_state.num_comps = objio_seg->num_comps; | ||
415 | 324 | ||
416 | *outp = &ios->ol_state; | 325 | ret = ore_get_rw_state(&objio_seg->layout, &objio_seg->oc, is_reading, |
326 | offset, count, &ios); | ||
327 | if (unlikely(ret)) { | ||
328 | kfree(aos); | ||
329 | return ret; | ||
330 | } | ||
331 | |||
332 | ios->pages = pages; | ||
333 | ios->pgbase = pgbase; | ||
334 | ios->private = aos; | ||
335 | BUG_ON(ios->nr_pages > (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT); | ||
336 | |||
337 | aos->objios.sync = 0; | ||
338 | aos->objios.ios = ios; | ||
339 | *outp = &aos->objios; | ||
417 | return 0; | 340 | return 0; |
418 | } | 341 | } |
419 | 342 | ||
420 | void objio_free_io_state(struct objlayout_io_state *ol_state) | 343 | void objio_free_result(struct objlayout_io_res *oir) |
421 | { | 344 | { |
422 | struct objio_state *ios = container_of(ol_state, struct objio_state, | 345 | struct objio_state *objios = container_of(oir, struct objio_state, oir); |
423 | ol_state); | ||
424 | 346 | ||
425 | kfree(ios); | 347 | ore_put_io_state(objios->ios); |
348 | kfree(objios); | ||
426 | } | 349 | } |
427 | 350 | ||
428 | enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep) | 351 | enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep) |
@@ -455,539 +378,152 @@ enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep) | |||
455 | } | 378 | } |
456 | } | 379 | } |
457 | 380 | ||
458 | static void _clear_bio(struct bio *bio) | 381 | static void __on_dev_error(struct ore_io_state *ios, |
382 | struct ore_dev *od, unsigned dev_index, enum osd_err_priority oep, | ||
383 | u64 dev_offset, u64 dev_len) | ||
459 | { | 384 | { |
460 | struct bio_vec *bv; | 385 | struct objio_state *objios = ios->private; |
461 | unsigned i; | 386 | struct pnfs_osd_objid pooid; |
462 | 387 | struct objio_dev_ent *ode = container_of(od, typeof(*ode), od); | |
463 | __bio_for_each_segment(bv, bio, i, 0) { | 388 | /* FIXME: what to do with more-then-one-group layouts. We need to |
464 | unsigned this_count = bv->bv_len; | 389 | * translate from ore_io_state index to oc->comps index |
465 | 390 | */ | |
466 | if (likely(PAGE_SIZE == this_count)) | 391 | unsigned comp = dev_index; |
467 | clear_highpage(bv->bv_page); | ||
468 | else | ||
469 | zero_user(bv->bv_page, bv->bv_offset, this_count); | ||
470 | } | ||
471 | } | ||
472 | |||
473 | static int _io_check(struct objio_state *ios, bool is_write) | ||
474 | { | ||
475 | enum osd_err_priority oep = OSD_ERR_PRI_NO_ERROR; | ||
476 | int lin_ret = 0; | ||
477 | int i; | ||
478 | |||
479 | for (i = 0; i < ios->numdevs; i++) { | ||
480 | struct osd_sense_info osi; | ||
481 | struct osd_request *or = ios->per_dev[i].or; | ||
482 | int ret; | ||
483 | |||
484 | if (!or) | ||
485 | continue; | ||
486 | 392 | ||
487 | ret = osd_req_decode_sense(or, &osi); | 393 | pooid.oid_device_id = ode->id_node.deviceid; |
488 | if (likely(!ret)) | 394 | pooid.oid_partition_id = ios->oc->comps[comp].obj.partition; |
489 | continue; | 395 | pooid.oid_object_id = ios->oc->comps[comp].obj.id; |
490 | 396 | ||
491 | if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) { | 397 | objlayout_io_set_result(&objios->oir, comp, |
492 | /* start read offset passed endof file */ | 398 | &pooid, osd_pri_2_pnfs_err(oep), |
493 | BUG_ON(is_write); | 399 | dev_offset, dev_len, !ios->reading); |
494 | _clear_bio(ios->per_dev[i].bio); | ||
495 | dprintk("%s: start read offset passed end of file " | ||
496 | "offset=0x%llx, length=0x%lx\n", __func__, | ||
497 | _LLU(ios->per_dev[i].offset), | ||
498 | ios->per_dev[i].length); | ||
499 | |||
500 | continue; /* we recovered */ | ||
501 | } | ||
502 | objlayout_io_set_result(&ios->ol_state, i, | ||
503 | &ios->layout->comps[i].oc_object_id, | ||
504 | osd_pri_2_pnfs_err(osi.osd_err_pri), | ||
505 | ios->per_dev[i].offset, | ||
506 | ios->per_dev[i].length, | ||
507 | is_write); | ||
508 | |||
509 | if (osi.osd_err_pri >= oep) { | ||
510 | oep = osi.osd_err_pri; | ||
511 | lin_ret = ret; | ||
512 | } | ||
513 | } | ||
514 | |||
515 | return lin_ret; | ||
516 | } | ||
517 | |||
518 | /* | ||
519 | * Common IO state helpers. | ||
520 | */ | ||
521 | static void _io_free(struct objio_state *ios) | ||
522 | { | ||
523 | unsigned i; | ||
524 | |||
525 | for (i = 0; i < ios->numdevs; i++) { | ||
526 | struct _objio_per_comp *per_dev = &ios->per_dev[i]; | ||
527 | |||
528 | if (per_dev->or) { | ||
529 | osd_end_request(per_dev->or); | ||
530 | per_dev->or = NULL; | ||
531 | } | ||
532 | |||
533 | if (per_dev->bio) { | ||
534 | bio_put(per_dev->bio); | ||
535 | per_dev->bio = NULL; | ||
536 | } | ||
537 | } | ||
538 | } | ||
539 | |||
540 | struct osd_dev *_io_od(struct objio_state *ios, unsigned dev) | ||
541 | { | ||
542 | unsigned min_dev = ios->layout->comps_index; | ||
543 | unsigned max_dev = min_dev + ios->layout->num_comps; | ||
544 | |||
545 | BUG_ON(dev < min_dev || max_dev <= dev); | ||
546 | return ios->layout->ods[dev - min_dev]->od; | ||
547 | } | ||
548 | |||
549 | struct _striping_info { | ||
550 | u64 obj_offset; | ||
551 | u64 group_length; | ||
552 | unsigned dev; | ||
553 | unsigned unit_off; | ||
554 | }; | ||
555 | |||
556 | static void _calc_stripe_info(struct objio_state *ios, u64 file_offset, | ||
557 | struct _striping_info *si) | ||
558 | { | ||
559 | u32 stripe_unit = ios->layout->stripe_unit; | ||
560 | u32 group_width = ios->layout->group_width; | ||
561 | u64 group_depth = ios->layout->group_depth; | ||
562 | u32 U = stripe_unit * group_width; | ||
563 | |||
564 | u64 T = U * group_depth; | ||
565 | u64 S = T * ios->layout->group_count; | ||
566 | u64 M = div64_u64(file_offset, S); | ||
567 | |||
568 | /* | ||
569 | G = (L - (M * S)) / T | ||
570 | H = (L - (M * S)) % T | ||
571 | */ | ||
572 | u64 LmodU = file_offset - M * S; | ||
573 | u32 G = div64_u64(LmodU, T); | ||
574 | u64 H = LmodU - G * T; | ||
575 | |||
576 | u32 N = div_u64(H, U); | ||
577 | |||
578 | div_u64_rem(file_offset, stripe_unit, &si->unit_off); | ||
579 | si->obj_offset = si->unit_off + (N * stripe_unit) + | ||
580 | (M * group_depth * stripe_unit); | ||
581 | |||
582 | /* "H - (N * U)" is just "H % U" so it's bound to u32 */ | ||
583 | si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width; | ||
584 | si->dev *= ios->layout->mirrors_p1; | ||
585 | |||
586 | si->group_length = T - H; | ||
587 | } | ||
588 | |||
589 | static int _add_stripe_unit(struct objio_state *ios, unsigned *cur_pg, | ||
590 | unsigned pgbase, struct _objio_per_comp *per_dev, int len, | ||
591 | gfp_t gfp_flags) | ||
592 | { | ||
593 | unsigned pg = *cur_pg; | ||
594 | int cur_len = len; | ||
595 | struct request_queue *q = | ||
596 | osd_request_queue(_io_od(ios, per_dev->dev)); | ||
597 | |||
598 | if (per_dev->bio == NULL) { | ||
599 | unsigned pages_in_stripe = ios->layout->group_width * | ||
600 | (ios->layout->stripe_unit / PAGE_SIZE); | ||
601 | unsigned bio_size = (ios->ol_state.nr_pages + pages_in_stripe) / | ||
602 | ios->layout->group_width; | ||
603 | |||
604 | if (BIO_MAX_PAGES_KMALLOC < bio_size) | ||
605 | bio_size = BIO_MAX_PAGES_KMALLOC; | ||
606 | |||
607 | per_dev->bio = bio_kmalloc(gfp_flags, bio_size); | ||
608 | if (unlikely(!per_dev->bio)) { | ||
609 | dprintk("Faild to allocate BIO size=%u\n", bio_size); | ||
610 | return -ENOMEM; | ||
611 | } | ||
612 | } | ||
613 | |||
614 | while (cur_len > 0) { | ||
615 | unsigned pglen = min_t(unsigned, PAGE_SIZE - pgbase, cur_len); | ||
616 | unsigned added_len; | ||
617 | |||
618 | BUG_ON(ios->ol_state.nr_pages <= pg); | ||
619 | cur_len -= pglen; | ||
620 | |||
621 | added_len = bio_add_pc_page(q, per_dev->bio, | ||
622 | ios->ol_state.pages[pg], pglen, pgbase); | ||
623 | if (unlikely(pglen != added_len)) | ||
624 | return -ENOMEM; | ||
625 | pgbase = 0; | ||
626 | ++pg; | ||
627 | } | ||
628 | BUG_ON(cur_len); | ||
629 | |||
630 | per_dev->length += len; | ||
631 | *cur_pg = pg; | ||
632 | return 0; | ||
633 | } | ||
634 | |||
635 | static int _prepare_one_group(struct objio_state *ios, u64 length, | ||
636 | struct _striping_info *si, unsigned *last_pg, | ||
637 | gfp_t gfp_flags) | ||
638 | { | ||
639 | unsigned stripe_unit = ios->layout->stripe_unit; | ||
640 | unsigned mirrors_p1 = ios->layout->mirrors_p1; | ||
641 | unsigned devs_in_group = ios->layout->group_width * mirrors_p1; | ||
642 | unsigned dev = si->dev; | ||
643 | unsigned first_dev = dev - (dev % devs_in_group); | ||
644 | unsigned max_comp = ios->numdevs ? ios->numdevs - mirrors_p1 : 0; | ||
645 | unsigned cur_pg = *last_pg; | ||
646 | int ret = 0; | ||
647 | |||
648 | while (length) { | ||
649 | struct _objio_per_comp *per_dev = &ios->per_dev[dev - first_dev]; | ||
650 | unsigned cur_len, page_off = 0; | ||
651 | |||
652 | if (!per_dev->length) { | ||
653 | per_dev->dev = dev; | ||
654 | if (dev < si->dev) { | ||
655 | per_dev->offset = si->obj_offset + stripe_unit - | ||
656 | si->unit_off; | ||
657 | cur_len = stripe_unit; | ||
658 | } else if (dev == si->dev) { | ||
659 | per_dev->offset = si->obj_offset; | ||
660 | cur_len = stripe_unit - si->unit_off; | ||
661 | page_off = si->unit_off & ~PAGE_MASK; | ||
662 | BUG_ON(page_off && | ||
663 | (page_off != ios->ol_state.pgbase)); | ||
664 | } else { /* dev > si->dev */ | ||
665 | per_dev->offset = si->obj_offset - si->unit_off; | ||
666 | cur_len = stripe_unit; | ||
667 | } | ||
668 | |||
669 | if (max_comp < dev - first_dev) | ||
670 | max_comp = dev - first_dev; | ||
671 | } else { | ||
672 | cur_len = stripe_unit; | ||
673 | } | ||
674 | if (cur_len >= length) | ||
675 | cur_len = length; | ||
676 | |||
677 | ret = _add_stripe_unit(ios, &cur_pg, page_off , per_dev, | ||
678 | cur_len, gfp_flags); | ||
679 | if (unlikely(ret)) | ||
680 | goto out; | ||
681 | |||
682 | dev += mirrors_p1; | ||
683 | dev = (dev % devs_in_group) + first_dev; | ||
684 | |||
685 | length -= cur_len; | ||
686 | ios->length += cur_len; | ||
687 | } | ||
688 | out: | ||
689 | ios->numdevs = max_comp + mirrors_p1; | ||
690 | *last_pg = cur_pg; | ||
691 | return ret; | ||
692 | } | ||
693 | |||
694 | static int _io_rw_pagelist(struct objio_state *ios, gfp_t gfp_flags) | ||
695 | { | ||
696 | u64 length = ios->ol_state.count; | ||
697 | u64 offset = ios->ol_state.offset; | ||
698 | struct _striping_info si; | ||
699 | unsigned last_pg = 0; | ||
700 | int ret = 0; | ||
701 | |||
702 | while (length) { | ||
703 | _calc_stripe_info(ios, offset, &si); | ||
704 | |||
705 | if (length < si.group_length) | ||
706 | si.group_length = length; | ||
707 | |||
708 | ret = _prepare_one_group(ios, si.group_length, &si, &last_pg, gfp_flags); | ||
709 | if (unlikely(ret)) | ||
710 | goto out; | ||
711 | |||
712 | offset += si.group_length; | ||
713 | length -= si.group_length; | ||
714 | } | ||
715 | |||
716 | out: | ||
717 | if (!ios->length) | ||
718 | return ret; | ||
719 | |||
720 | return 0; | ||
721 | } | ||
722 | |||
723 | static ssize_t _sync_done(struct objio_state *ios) | ||
724 | { | ||
725 | struct completion *waiting = ios->private; | ||
726 | |||
727 | complete(waiting); | ||
728 | return 0; | ||
729 | } | ||
730 | |||
731 | static void _last_io(struct kref *kref) | ||
732 | { | ||
733 | struct objio_state *ios = container_of(kref, struct objio_state, kref); | ||
734 | |||
735 | ios->done(ios); | ||
736 | } | ||
737 | |||
738 | static void _done_io(struct osd_request *or, void *p) | ||
739 | { | ||
740 | struct objio_state *ios = p; | ||
741 | |||
742 | kref_put(&ios->kref, _last_io); | ||
743 | } | ||
744 | |||
745 | static ssize_t _io_exec(struct objio_state *ios) | ||
746 | { | ||
747 | DECLARE_COMPLETION_ONSTACK(wait); | ||
748 | ssize_t status = 0; /* sync status */ | ||
749 | unsigned i; | ||
750 | objio_done_fn saved_done_fn = ios->done; | ||
751 | bool sync = ios->ol_state.sync; | ||
752 | |||
753 | if (sync) { | ||
754 | ios->done = _sync_done; | ||
755 | ios->private = &wait; | ||
756 | } | ||
757 | |||
758 | kref_init(&ios->kref); | ||
759 | |||
760 | for (i = 0; i < ios->numdevs; i++) { | ||
761 | struct osd_request *or = ios->per_dev[i].or; | ||
762 | |||
763 | if (!or) | ||
764 | continue; | ||
765 | |||
766 | kref_get(&ios->kref); | ||
767 | osd_execute_request_async(or, _done_io, ios); | ||
768 | } | ||
769 | |||
770 | kref_put(&ios->kref, _last_io); | ||
771 | |||
772 | if (sync) { | ||
773 | wait_for_completion(&wait); | ||
774 | status = saved_done_fn(ios); | ||
775 | } | ||
776 | |||
777 | return status; | ||
778 | } | 400 | } |
779 | 401 | ||
780 | /* | 402 | /* |
781 | * read | 403 | * read |
782 | */ | 404 | */ |
783 | static ssize_t _read_done(struct objio_state *ios) | 405 | static void _read_done(struct ore_io_state *ios, void *private) |
784 | { | 406 | { |
407 | struct objio_state *objios = private; | ||
785 | ssize_t status; | 408 | ssize_t status; |
786 | int ret = _io_check(ios, false); | 409 | int ret = ore_check_io(ios, &__on_dev_error); |
787 | 410 | ||
788 | _io_free(ios); | 411 | /* FIXME: _io_free(ios) can we dealocate the libosd resources; */ |
789 | 412 | ||
790 | if (likely(!ret)) | 413 | if (likely(!ret)) |
791 | status = ios->length; | 414 | status = ios->length; |
792 | else | 415 | else |
793 | status = ret; | 416 | status = ret; |
794 | 417 | ||
795 | objlayout_read_done(&ios->ol_state, status, ios->ol_state.sync); | 418 | objlayout_read_done(&objios->oir, status, objios->sync); |
796 | return status; | ||
797 | } | 419 | } |
798 | 420 | ||
799 | static int _read_mirrors(struct objio_state *ios, unsigned cur_comp) | 421 | int objio_read_pagelist(struct nfs_read_data *rdata) |
800 | { | 422 | { |
801 | struct osd_request *or = NULL; | 423 | struct objio_state *objios; |
802 | struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp]; | ||
803 | unsigned dev = per_dev->dev; | ||
804 | struct pnfs_osd_object_cred *cred = | ||
805 | &ios->layout->comps[cur_comp]; | ||
806 | struct osd_obj_id obj = { | ||
807 | .partition = cred->oc_object_id.oid_partition_id, | ||
808 | .id = cred->oc_object_id.oid_object_id, | ||
809 | }; | ||
810 | int ret; | 424 | int ret; |
811 | 425 | ||
812 | or = osd_start_request(_io_od(ios, dev), GFP_KERNEL); | 426 | ret = objio_alloc_io_state(NFS_I(rdata->inode)->layout, true, |
813 | if (unlikely(!or)) { | 427 | rdata->lseg, rdata->args.pages, rdata->args.pgbase, |
814 | ret = -ENOMEM; | 428 | rdata->args.offset, rdata->args.count, rdata, |
815 | goto err; | 429 | GFP_KERNEL, &objios); |
816 | } | ||
817 | per_dev->or = or; | ||
818 | |||
819 | osd_req_read(or, &obj, per_dev->offset, per_dev->bio, per_dev->length); | ||
820 | |||
821 | ret = osd_finalize_request(or, 0, cred->oc_cap.cred, NULL); | ||
822 | if (ret) { | ||
823 | dprintk("%s: Faild to osd_finalize_request() => %d\n", | ||
824 | __func__, ret); | ||
825 | goto err; | ||
826 | } | ||
827 | |||
828 | dprintk("%s:[%d] dev=%d obj=0x%llx start=0x%llx length=0x%lx\n", | ||
829 | __func__, cur_comp, dev, obj.id, _LLU(per_dev->offset), | ||
830 | per_dev->length); | ||
831 | |||
832 | err: | ||
833 | return ret; | ||
834 | } | ||
835 | |||
836 | static ssize_t _read_exec(struct objio_state *ios) | ||
837 | { | ||
838 | unsigned i; | ||
839 | int ret; | ||
840 | |||
841 | for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) { | ||
842 | if (!ios->per_dev[i].length) | ||
843 | continue; | ||
844 | ret = _read_mirrors(ios, i); | ||
845 | if (unlikely(ret)) | ||
846 | goto err; | ||
847 | } | ||
848 | |||
849 | ios->done = _read_done; | ||
850 | return _io_exec(ios); /* In sync mode exec returns the io status */ | ||
851 | |||
852 | err: | ||
853 | _io_free(ios); | ||
854 | return ret; | ||
855 | } | ||
856 | |||
857 | ssize_t objio_read_pagelist(struct objlayout_io_state *ol_state) | ||
858 | { | ||
859 | struct objio_state *ios = container_of(ol_state, struct objio_state, | ||
860 | ol_state); | ||
861 | int ret; | ||
862 | |||
863 | ret = _io_rw_pagelist(ios, GFP_KERNEL); | ||
864 | if (unlikely(ret)) | 430 | if (unlikely(ret)) |
865 | return ret; | 431 | return ret; |
866 | 432 | ||
867 | return _read_exec(ios); | 433 | objios->ios->done = _read_done; |
434 | dprintk("%s: offset=0x%llx length=0x%x\n", __func__, | ||
435 | rdata->args.offset, rdata->args.count); | ||
436 | return ore_read(objios->ios); | ||
868 | } | 437 | } |
869 | 438 | ||
870 | /* | 439 | /* |
871 | * write | 440 | * write |
872 | */ | 441 | */ |
873 | static ssize_t _write_done(struct objio_state *ios) | 442 | static void _write_done(struct ore_io_state *ios, void *private) |
874 | { | 443 | { |
444 | struct objio_state *objios = private; | ||
875 | ssize_t status; | 445 | ssize_t status; |
876 | int ret = _io_check(ios, true); | 446 | int ret = ore_check_io(ios, &__on_dev_error); |
877 | 447 | ||
878 | _io_free(ios); | 448 | /* FIXME: _io_free(ios) can we dealocate the libosd resources; */ |
879 | 449 | ||
880 | if (likely(!ret)) { | 450 | if (likely(!ret)) { |
881 | /* FIXME: should be based on the OSD's persistence model | 451 | /* FIXME: should be based on the OSD's persistence model |
882 | * See OSD2r05 Section 4.13 Data persistence model */ | 452 | * See OSD2r05 Section 4.13 Data persistence model */ |
883 | ios->ol_state.committed = NFS_FILE_SYNC; | 453 | objios->oir.committed = NFS_FILE_SYNC; |
884 | status = ios->length; | 454 | status = ios->length; |
885 | } else { | 455 | } else { |
886 | status = ret; | 456 | status = ret; |
887 | } | 457 | } |
888 | 458 | ||
889 | objlayout_write_done(&ios->ol_state, status, ios->ol_state.sync); | 459 | objlayout_write_done(&objios->oir, status, objios->sync); |
890 | return status; | ||
891 | } | 460 | } |
892 | 461 | ||
893 | static int _write_mirrors(struct objio_state *ios, unsigned cur_comp) | 462 | static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate) |
894 | { | 463 | { |
895 | struct _objio_per_comp *master_dev = &ios->per_dev[cur_comp]; | 464 | struct objio_state *objios = priv; |
896 | unsigned dev = ios->per_dev[cur_comp].dev; | 465 | struct nfs_write_data *wdata = objios->oir.rpcdata; |
897 | unsigned last_comp = cur_comp + ios->layout->mirrors_p1; | 466 | pgoff_t index = offset / PAGE_SIZE; |
898 | int ret; | 467 | struct page *page = find_get_page(wdata->inode->i_mapping, index); |
899 | |||
900 | for (; cur_comp < last_comp; ++cur_comp, ++dev) { | ||
901 | struct osd_request *or = NULL; | ||
902 | struct pnfs_osd_object_cred *cred = | ||
903 | &ios->layout->comps[cur_comp]; | ||
904 | struct osd_obj_id obj = { | ||
905 | .partition = cred->oc_object_id.oid_partition_id, | ||
906 | .id = cred->oc_object_id.oid_object_id, | ||
907 | }; | ||
908 | struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp]; | ||
909 | struct bio *bio; | ||
910 | |||
911 | or = osd_start_request(_io_od(ios, dev), GFP_NOFS); | ||
912 | if (unlikely(!or)) { | ||
913 | ret = -ENOMEM; | ||
914 | goto err; | ||
915 | } | ||
916 | per_dev->or = or; | ||
917 | |||
918 | if (per_dev != master_dev) { | ||
919 | bio = bio_kmalloc(GFP_NOFS, | ||
920 | master_dev->bio->bi_max_vecs); | ||
921 | if (unlikely(!bio)) { | ||
922 | dprintk("Faild to allocate BIO size=%u\n", | ||
923 | master_dev->bio->bi_max_vecs); | ||
924 | ret = -ENOMEM; | ||
925 | goto err; | ||
926 | } | ||
927 | |||
928 | __bio_clone(bio, master_dev->bio); | ||
929 | bio->bi_bdev = NULL; | ||
930 | bio->bi_next = NULL; | ||
931 | per_dev->bio = bio; | ||
932 | per_dev->dev = dev; | ||
933 | per_dev->length = master_dev->length; | ||
934 | per_dev->offset = master_dev->offset; | ||
935 | } else { | ||
936 | bio = master_dev->bio; | ||
937 | bio->bi_rw |= REQ_WRITE; | ||
938 | } | ||
939 | |||
940 | osd_req_write(or, &obj, per_dev->offset, bio, per_dev->length); | ||
941 | 468 | ||
942 | ret = osd_finalize_request(or, 0, cred->oc_cap.cred, NULL); | 469 | if (!page) { |
943 | if (ret) { | 470 | page = find_or_create_page(wdata->inode->i_mapping, |
944 | dprintk("%s: Faild to osd_finalize_request() => %d\n", | 471 | index, GFP_NOFS); |
945 | __func__, ret); | 472 | if (unlikely(!page)) { |
946 | goto err; | 473 | dprintk("%s: grab_cache_page Failed index=0x%lx\n", |
474 | __func__, index); | ||
475 | return NULL; | ||
947 | } | 476 | } |
948 | 477 | unlock_page(page); | |
949 | dprintk("%s:[%d] dev=%d obj=0x%llx start=0x%llx length=0x%lx\n", | ||
950 | __func__, cur_comp, dev, obj.id, _LLU(per_dev->offset), | ||
951 | per_dev->length); | ||
952 | } | 478 | } |
479 | if (PageDirty(page) || PageWriteback(page)) | ||
480 | *uptodate = true; | ||
481 | else | ||
482 | *uptodate = PageUptodate(page); | ||
483 | dprintk("%s: index=0x%lx uptodate=%d\n", __func__, index, *uptodate); | ||
484 | return page; | ||
485 | } | ||
953 | 486 | ||
954 | err: | 487 | static void __r4w_put_page(void *priv, struct page *page) |
955 | return ret; | 488 | { |
489 | dprintk("%s: index=0x%lx\n", __func__, page->index); | ||
490 | page_cache_release(page); | ||
491 | return; | ||
956 | } | 492 | } |
957 | 493 | ||
958 | static ssize_t _write_exec(struct objio_state *ios) | 494 | static const struct _ore_r4w_op _r4w_op = { |
495 | .get_page = &__r4w_get_page, | ||
496 | .put_page = &__r4w_put_page, | ||
497 | }; | ||
498 | |||
499 | int objio_write_pagelist(struct nfs_write_data *wdata, int how) | ||
959 | { | 500 | { |
960 | unsigned i; | 501 | struct objio_state *objios; |
961 | int ret; | 502 | int ret; |
962 | 503 | ||
963 | for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) { | 504 | ret = objio_alloc_io_state(NFS_I(wdata->inode)->layout, false, |
964 | if (!ios->per_dev[i].length) | 505 | wdata->lseg, wdata->args.pages, wdata->args.pgbase, |
965 | continue; | 506 | wdata->args.offset, wdata->args.count, wdata, GFP_NOFS, |
966 | ret = _write_mirrors(ios, i); | 507 | &objios); |
967 | if (unlikely(ret)) | 508 | if (unlikely(ret)) |
968 | goto err; | 509 | return ret; |
969 | } | ||
970 | |||
971 | ios->done = _write_done; | ||
972 | return _io_exec(ios); /* In sync mode exec returns the io->status */ | ||
973 | 510 | ||
974 | err: | 511 | objios->sync = 0 != (how & FLUSH_SYNC); |
975 | _io_free(ios); | 512 | objios->ios->r4w = &_r4w_op; |
976 | return ret; | ||
977 | } | ||
978 | 513 | ||
979 | ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state, bool stable) | 514 | if (!objios->sync) |
980 | { | 515 | objios->ios->done = _write_done; |
981 | struct objio_state *ios = container_of(ol_state, struct objio_state, | ||
982 | ol_state); | ||
983 | int ret; | ||
984 | 516 | ||
985 | /* TODO: ios->stable = stable; */ | 517 | dprintk("%s: offset=0x%llx length=0x%x\n", __func__, |
986 | ret = _io_rw_pagelist(ios, GFP_NOFS); | 518 | wdata->args.offset, wdata->args.count); |
519 | ret = ore_write(objios->ios); | ||
987 | if (unlikely(ret)) | 520 | if (unlikely(ret)) |
988 | return ret; | 521 | return ret; |
989 | 522 | ||
990 | return _write_exec(ios); | 523 | if (objios->sync) |
524 | _write_done(objios->ios, objios); | ||
525 | |||
526 | return 0; | ||
991 | } | 527 | } |
992 | 528 | ||
993 | static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, | 529 | static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, |
@@ -997,7 +533,7 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, | |||
997 | return false; | 533 | return false; |
998 | 534 | ||
999 | return pgio->pg_count + req->wb_bytes <= | 535 | return pgio->pg_count + req->wb_bytes <= |
1000 | OBJIO_LSEG(pgio->pg_lseg)->max_io_size; | 536 | OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length; |
1001 | } | 537 | } |
1002 | 538 | ||
1003 | static const struct nfs_pageio_ops objio_pg_read_ops = { | 539 | static const struct nfs_pageio_ops objio_pg_read_ops = { |
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index 1d06f8e2adea..72074e3a04f9 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c | |||
@@ -156,77 +156,39 @@ last_byte_offset(u64 start, u64 len) | |||
156 | return end > start ? end - 1 : NFS4_MAX_UINT64; | 156 | return end > start ? end - 1 : NFS4_MAX_UINT64; |
157 | } | 157 | } |
158 | 158 | ||
159 | static struct objlayout_io_state * | 159 | void _fix_verify_io_params(struct pnfs_layout_segment *lseg, |
160 | objlayout_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type, | 160 | struct page ***p_pages, unsigned *p_pgbase, |
161 | struct page **pages, | 161 | u64 offset, unsigned long count) |
162 | unsigned pgbase, | ||
163 | loff_t offset, | ||
164 | size_t count, | ||
165 | struct pnfs_layout_segment *lseg, | ||
166 | void *rpcdata, | ||
167 | gfp_t gfp_flags) | ||
168 | { | 162 | { |
169 | struct objlayout_io_state *state; | ||
170 | u64 lseg_end_offset; | 163 | u64 lseg_end_offset; |
171 | 164 | ||
172 | dprintk("%s: allocating io_state\n", __func__); | ||
173 | if (objio_alloc_io_state(lseg, &state, gfp_flags)) | ||
174 | return NULL; | ||
175 | |||
176 | BUG_ON(offset < lseg->pls_range.offset); | 165 | BUG_ON(offset < lseg->pls_range.offset); |
177 | lseg_end_offset = end_offset(lseg->pls_range.offset, | 166 | lseg_end_offset = end_offset(lseg->pls_range.offset, |
178 | lseg->pls_range.length); | 167 | lseg->pls_range.length); |
179 | BUG_ON(offset >= lseg_end_offset); | 168 | BUG_ON(offset >= lseg_end_offset); |
180 | if (offset + count > lseg_end_offset) { | 169 | WARN_ON(offset + count > lseg_end_offset); |
181 | count = lseg->pls_range.length - | ||
182 | (offset - lseg->pls_range.offset); | ||
183 | dprintk("%s: truncated count %Zd\n", __func__, count); | ||
184 | } | ||
185 | 170 | ||
186 | if (pgbase > PAGE_SIZE) { | 171 | if (*p_pgbase > PAGE_SIZE) { |
187 | pages += pgbase >> PAGE_SHIFT; | 172 | dprintk("%s: pgbase(0x%x) > PAGE_SIZE\n", __func__, *p_pgbase); |
188 | pgbase &= ~PAGE_MASK; | 173 | *p_pages += *p_pgbase >> PAGE_SHIFT; |
174 | *p_pgbase &= ~PAGE_MASK; | ||
189 | } | 175 | } |
190 | |||
191 | INIT_LIST_HEAD(&state->err_list); | ||
192 | state->lseg = lseg; | ||
193 | state->rpcdata = rpcdata; | ||
194 | state->pages = pages; | ||
195 | state->pgbase = pgbase; | ||
196 | state->nr_pages = (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
197 | state->offset = offset; | ||
198 | state->count = count; | ||
199 | state->sync = 0; | ||
200 | |||
201 | return state; | ||
202 | } | ||
203 | |||
204 | static void | ||
205 | objlayout_free_io_state(struct objlayout_io_state *state) | ||
206 | { | ||
207 | dprintk("%s: freeing io_state\n", __func__); | ||
208 | if (unlikely(!state)) | ||
209 | return; | ||
210 | |||
211 | objio_free_io_state(state); | ||
212 | } | 176 | } |
213 | 177 | ||
214 | /* | 178 | /* |
215 | * I/O done common code | 179 | * I/O done common code |
216 | */ | 180 | */ |
217 | static void | 181 | static void |
218 | objlayout_iodone(struct objlayout_io_state *state) | 182 | objlayout_iodone(struct objlayout_io_res *oir) |
219 | { | 183 | { |
220 | dprintk("%s: state %p status\n", __func__, state); | 184 | if (likely(oir->status >= 0)) { |
221 | 185 | objio_free_result(oir); | |
222 | if (likely(state->status >= 0)) { | ||
223 | objlayout_free_io_state(state); | ||
224 | } else { | 186 | } else { |
225 | struct objlayout *objlay = OBJLAYOUT(state->lseg->pls_layout); | 187 | struct objlayout *objlay = oir->objlay; |
226 | 188 | ||
227 | spin_lock(&objlay->lock); | 189 | spin_lock(&objlay->lock); |
228 | objlay->delta_space_valid = OBJ_DSU_INVALID; | 190 | objlay->delta_space_valid = OBJ_DSU_INVALID; |
229 | list_add(&objlay->err_list, &state->err_list); | 191 | list_add(&objlay->err_list, &oir->err_list); |
230 | spin_unlock(&objlay->lock); | 192 | spin_unlock(&objlay->lock); |
231 | } | 193 | } |
232 | } | 194 | } |
@@ -238,13 +200,13 @@ objlayout_iodone(struct objlayout_io_state *state) | |||
238 | * the error for later reporting at layout-return. | 200 | * the error for later reporting at layout-return. |
239 | */ | 201 | */ |
240 | void | 202 | void |
241 | objlayout_io_set_result(struct objlayout_io_state *state, unsigned index, | 203 | objlayout_io_set_result(struct objlayout_io_res *oir, unsigned index, |
242 | struct pnfs_osd_objid *pooid, int osd_error, | 204 | struct pnfs_osd_objid *pooid, int osd_error, |
243 | u64 offset, u64 length, bool is_write) | 205 | u64 offset, u64 length, bool is_write) |
244 | { | 206 | { |
245 | struct pnfs_osd_ioerr *ioerr = &state->ioerrs[index]; | 207 | struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[index]; |
246 | 208 | ||
247 | BUG_ON(index >= state->num_comps); | 209 | BUG_ON(index >= oir->num_comps); |
248 | if (osd_error) { | 210 | if (osd_error) { |
249 | ioerr->oer_component = *pooid; | 211 | ioerr->oer_component = *pooid; |
250 | ioerr->oer_comp_offset = offset; | 212 | ioerr->oer_comp_offset = offset; |
@@ -285,21 +247,18 @@ static void _rpc_read_complete(struct work_struct *work) | |||
285 | } | 247 | } |
286 | 248 | ||
287 | void | 249 | void |
288 | objlayout_read_done(struct objlayout_io_state *state, ssize_t status, bool sync) | 250 | objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync) |
289 | { | 251 | { |
290 | int eof = state->eof; | 252 | struct nfs_read_data *rdata = oir->rpcdata; |
291 | struct nfs_read_data *rdata; | ||
292 | 253 | ||
293 | state->status = status; | 254 | oir->status = rdata->task.tk_status = status; |
294 | dprintk("%s: Begin status=%zd eof=%d\n", __func__, status, eof); | 255 | if (status >= 0) |
295 | rdata = state->rpcdata; | ||
296 | rdata->task.tk_status = status; | ||
297 | if (status >= 0) { | ||
298 | rdata->res.count = status; | 256 | rdata->res.count = status; |
299 | rdata->res.eof = eof; | 257 | objlayout_iodone(oir); |
300 | } | 258 | /* must not use oir after this point */ |
301 | objlayout_iodone(state); | 259 | |
302 | /* must not use state after this point */ | 260 | dprintk("%s: Return status=%zd eof=%d sync=%d\n", __func__, |
261 | status, rdata->res.eof, sync); | ||
303 | 262 | ||
304 | if (sync) | 263 | if (sync) |
305 | pnfs_ld_read_done(rdata); | 264 | pnfs_ld_read_done(rdata); |
@@ -317,40 +276,36 @@ objlayout_read_pagelist(struct nfs_read_data *rdata) | |||
317 | { | 276 | { |
318 | loff_t offset = rdata->args.offset; | 277 | loff_t offset = rdata->args.offset; |
319 | size_t count = rdata->args.count; | 278 | size_t count = rdata->args.count; |
320 | struct objlayout_io_state *state; | 279 | int err; |
321 | ssize_t status = 0; | ||
322 | loff_t eof; | 280 | loff_t eof; |
323 | 281 | ||
324 | dprintk("%s: Begin inode %p offset %llu count %d\n", | ||
325 | __func__, rdata->inode, offset, (int)count); | ||
326 | |||
327 | eof = i_size_read(rdata->inode); | 282 | eof = i_size_read(rdata->inode); |
328 | if (unlikely(offset + count > eof)) { | 283 | if (unlikely(offset + count > eof)) { |
329 | if (offset >= eof) { | 284 | if (offset >= eof) { |
330 | status = 0; | 285 | err = 0; |
331 | rdata->res.count = 0; | 286 | rdata->res.count = 0; |
332 | rdata->res.eof = 1; | 287 | rdata->res.eof = 1; |
288 | /*FIXME: do we need to call pnfs_ld_read_done() */ | ||
333 | goto out; | 289 | goto out; |
334 | } | 290 | } |
335 | count = eof - offset; | 291 | count = eof - offset; |
336 | } | 292 | } |
337 | 293 | ||
338 | state = objlayout_alloc_io_state(NFS_I(rdata->inode)->layout, | 294 | rdata->res.eof = (offset + count) >= eof; |
339 | rdata->args.pages, rdata->args.pgbase, | 295 | _fix_verify_io_params(rdata->lseg, &rdata->args.pages, |
340 | offset, count, | 296 | &rdata->args.pgbase, |
341 | rdata->lseg, rdata, | 297 | rdata->args.offset, rdata->args.count); |
342 | GFP_KERNEL); | ||
343 | if (unlikely(!state)) { | ||
344 | status = -ENOMEM; | ||
345 | goto out; | ||
346 | } | ||
347 | 298 | ||
348 | state->eof = state->offset + state->count >= eof; | 299 | dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n", |
300 | __func__, rdata->inode->i_ino, offset, count, rdata->res.eof); | ||
349 | 301 | ||
350 | status = objio_read_pagelist(state); | 302 | err = objio_read_pagelist(rdata); |
351 | out: | 303 | out: |
352 | dprintk("%s: Return status %Zd\n", __func__, status); | 304 | if (unlikely(err)) { |
353 | rdata->pnfs_error = status; | 305 | rdata->pnfs_error = err; |
306 | dprintk("%s: Returned Error %d\n", __func__, err); | ||
307 | return PNFS_NOT_ATTEMPTED; | ||
308 | } | ||
354 | return PNFS_ATTEMPTED; | 309 | return PNFS_ATTEMPTED; |
355 | } | 310 | } |
356 | 311 | ||
@@ -371,26 +326,20 @@ static void _rpc_write_complete(struct work_struct *work) | |||
371 | } | 326 | } |
372 | 327 | ||
373 | void | 328 | void |
374 | objlayout_write_done(struct objlayout_io_state *state, ssize_t status, | 329 | objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync) |
375 | bool sync) | ||
376 | { | 330 | { |
377 | struct nfs_write_data *wdata; | 331 | struct nfs_write_data *wdata = oir->rpcdata; |
378 | 332 | ||
379 | dprintk("%s: Begin\n", __func__); | 333 | oir->status = wdata->task.tk_status = status; |
380 | wdata = state->rpcdata; | ||
381 | state->status = status; | ||
382 | wdata->task.tk_status = status; | ||
383 | if (status >= 0) { | 334 | if (status >= 0) { |
384 | wdata->res.count = status; | 335 | wdata->res.count = status; |
385 | wdata->verf.committed = state->committed; | 336 | wdata->verf.committed = oir->committed; |
386 | dprintk("%s: Return status %d committed %d\n", | 337 | } |
387 | __func__, wdata->task.tk_status, | 338 | objlayout_iodone(oir); |
388 | wdata->verf.committed); | 339 | /* must not use oir after this point */ |
389 | } else | 340 | |
390 | dprintk("%s: Return status %d\n", | 341 | dprintk("%s: Return status %zd committed %d sync=%d\n", __func__, |
391 | __func__, wdata->task.tk_status); | 342 | status, wdata->verf.committed, sync); |
392 | objlayout_iodone(state); | ||
393 | /* must not use state after this point */ | ||
394 | 343 | ||
395 | if (sync) | 344 | if (sync) |
396 | pnfs_ld_write_done(wdata); | 345 | pnfs_ld_write_done(wdata); |
@@ -407,30 +356,18 @@ enum pnfs_try_status | |||
407 | objlayout_write_pagelist(struct nfs_write_data *wdata, | 356 | objlayout_write_pagelist(struct nfs_write_data *wdata, |
408 | int how) | 357 | int how) |
409 | { | 358 | { |
410 | struct objlayout_io_state *state; | 359 | int err; |
411 | ssize_t status; | ||
412 | |||
413 | dprintk("%s: Begin inode %p offset %llu count %u\n", | ||
414 | __func__, wdata->inode, wdata->args.offset, wdata->args.count); | ||
415 | |||
416 | state = objlayout_alloc_io_state(NFS_I(wdata->inode)->layout, | ||
417 | wdata->args.pages, | ||
418 | wdata->args.pgbase, | ||
419 | wdata->args.offset, | ||
420 | wdata->args.count, | ||
421 | wdata->lseg, wdata, | ||
422 | GFP_NOFS); | ||
423 | if (unlikely(!state)) { | ||
424 | status = -ENOMEM; | ||
425 | goto out; | ||
426 | } | ||
427 | 360 | ||
428 | state->sync = how & FLUSH_SYNC; | 361 | _fix_verify_io_params(wdata->lseg, &wdata->args.pages, |
362 | &wdata->args.pgbase, | ||
363 | wdata->args.offset, wdata->args.count); | ||
429 | 364 | ||
430 | status = objio_write_pagelist(state, how & FLUSH_STABLE); | 365 | err = objio_write_pagelist(wdata, how); |
431 | out: | 366 | if (unlikely(err)) { |
432 | dprintk("%s: Return status %Zd\n", __func__, status); | 367 | wdata->pnfs_error = err; |
433 | wdata->pnfs_error = status; | 368 | dprintk("%s: Returned Error %d\n", __func__, err); |
369 | return PNFS_NOT_ATTEMPTED; | ||
370 | } | ||
434 | return PNFS_ATTEMPTED; | 371 | return PNFS_ATTEMPTED; |
435 | } | 372 | } |
436 | 373 | ||
@@ -537,14 +474,14 @@ merge_ioerr(struct pnfs_osd_ioerr *dest_err, | |||
537 | static void | 474 | static void |
538 | encode_accumulated_error(struct objlayout *objlay, __be32 *p) | 475 | encode_accumulated_error(struct objlayout *objlay, __be32 *p) |
539 | { | 476 | { |
540 | struct objlayout_io_state *state, *tmp; | 477 | struct objlayout_io_res *oir, *tmp; |
541 | struct pnfs_osd_ioerr accumulated_err = {.oer_errno = 0}; | 478 | struct pnfs_osd_ioerr accumulated_err = {.oer_errno = 0}; |
542 | 479 | ||
543 | list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) { | 480 | list_for_each_entry_safe(oir, tmp, &objlay->err_list, err_list) { |
544 | unsigned i; | 481 | unsigned i; |
545 | 482 | ||
546 | for (i = 0; i < state->num_comps; i++) { | 483 | for (i = 0; i < oir->num_comps; i++) { |
547 | struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i]; | 484 | struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[i]; |
548 | 485 | ||
549 | if (!ioerr->oer_errno) | 486 | if (!ioerr->oer_errno) |
550 | continue; | 487 | continue; |
@@ -563,8 +500,8 @@ encode_accumulated_error(struct objlayout *objlay, __be32 *p) | |||
563 | 500 | ||
564 | merge_ioerr(&accumulated_err, ioerr); | 501 | merge_ioerr(&accumulated_err, ioerr); |
565 | } | 502 | } |
566 | list_del(&state->err_list); | 503 | list_del(&oir->err_list); |
567 | objlayout_free_io_state(state); | 504 | objio_free_result(oir); |
568 | } | 505 | } |
569 | 506 | ||
570 | pnfs_osd_xdr_encode_ioerr(p, &accumulated_err); | 507 | pnfs_osd_xdr_encode_ioerr(p, &accumulated_err); |
@@ -576,7 +513,7 @@ objlayout_encode_layoutreturn(struct pnfs_layout_hdr *pnfslay, | |||
576 | const struct nfs4_layoutreturn_args *args) | 513 | const struct nfs4_layoutreturn_args *args) |
577 | { | 514 | { |
578 | struct objlayout *objlay = OBJLAYOUT(pnfslay); | 515 | struct objlayout *objlay = OBJLAYOUT(pnfslay); |
579 | struct objlayout_io_state *state, *tmp; | 516 | struct objlayout_io_res *oir, *tmp; |
580 | __be32 *start; | 517 | __be32 *start; |
581 | 518 | ||
582 | dprintk("%s: Begin\n", __func__); | 519 | dprintk("%s: Begin\n", __func__); |
@@ -585,13 +522,13 @@ objlayout_encode_layoutreturn(struct pnfs_layout_hdr *pnfslay, | |||
585 | 522 | ||
586 | spin_lock(&objlay->lock); | 523 | spin_lock(&objlay->lock); |
587 | 524 | ||
588 | list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) { | 525 | list_for_each_entry_safe(oir, tmp, &objlay->err_list, err_list) { |
589 | __be32 *last_xdr = NULL, *p; | 526 | __be32 *last_xdr = NULL, *p; |
590 | unsigned i; | 527 | unsigned i; |
591 | int res = 0; | 528 | int res = 0; |
592 | 529 | ||
593 | for (i = 0; i < state->num_comps; i++) { | 530 | for (i = 0; i < oir->num_comps; i++) { |
594 | struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i]; | 531 | struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[i]; |
595 | 532 | ||
596 | if (!ioerr->oer_errno) | 533 | if (!ioerr->oer_errno) |
597 | continue; | 534 | continue; |
@@ -615,7 +552,7 @@ objlayout_encode_layoutreturn(struct pnfs_layout_hdr *pnfslay, | |||
615 | } | 552 | } |
616 | 553 | ||
617 | last_xdr = p; | 554 | last_xdr = p; |
618 | pnfs_osd_xdr_encode_ioerr(p, &state->ioerrs[i]); | 555 | pnfs_osd_xdr_encode_ioerr(p, &oir->ioerrs[i]); |
619 | } | 556 | } |
620 | 557 | ||
621 | /* TODO: use xdr_write_pages */ | 558 | /* TODO: use xdr_write_pages */ |
@@ -631,8 +568,8 @@ objlayout_encode_layoutreturn(struct pnfs_layout_hdr *pnfslay, | |||
631 | encode_accumulated_error(objlay, last_xdr); | 568 | encode_accumulated_error(objlay, last_xdr); |
632 | goto loop_done; | 569 | goto loop_done; |
633 | } | 570 | } |
634 | list_del(&state->err_list); | 571 | list_del(&oir->err_list); |
635 | objlayout_free_io_state(state); | 572 | objio_free_result(oir); |
636 | } | 573 | } |
637 | loop_done: | 574 | loop_done: |
638 | spin_unlock(&objlay->lock); | 575 | spin_unlock(&objlay->lock); |
diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h index a8244c8e042d..8ec34727ed21 100644 --- a/fs/nfs/objlayout/objlayout.h +++ b/fs/nfs/objlayout/objlayout.h | |||
@@ -74,19 +74,11 @@ OBJLAYOUT(struct pnfs_layout_hdr *lo) | |||
74 | * per-I/O operation state | 74 | * per-I/O operation state |
75 | * embedded in objects provider io_state data structure | 75 | * embedded in objects provider io_state data structure |
76 | */ | 76 | */ |
77 | struct objlayout_io_state { | 77 | struct objlayout_io_res { |
78 | struct pnfs_layout_segment *lseg; | 78 | struct objlayout *objlay; |
79 | |||
80 | struct page **pages; | ||
81 | unsigned pgbase; | ||
82 | unsigned nr_pages; | ||
83 | unsigned long count; | ||
84 | loff_t offset; | ||
85 | bool sync; | ||
86 | 79 | ||
87 | void *rpcdata; | 80 | void *rpcdata; |
88 | int status; /* res */ | 81 | int status; /* res */ |
89 | int eof; /* res */ | ||
90 | int committed; /* res */ | 82 | int committed; /* res */ |
91 | 83 | ||
92 | /* Error reporting (layout_return) */ | 84 | /* Error reporting (layout_return) */ |
@@ -100,6 +92,18 @@ struct objlayout_io_state { | |||
100 | struct pnfs_osd_ioerr *ioerrs; | 92 | struct pnfs_osd_ioerr *ioerrs; |
101 | }; | 93 | }; |
102 | 94 | ||
95 | static inline | ||
96 | void objlayout_init_ioerrs(struct objlayout_io_res *oir, unsigned num_comps, | ||
97 | struct pnfs_osd_ioerr *ioerrs, void *rpcdata, | ||
98 | struct pnfs_layout_hdr *pnfs_layout_type) | ||
99 | { | ||
100 | oir->objlay = OBJLAYOUT(pnfs_layout_type); | ||
101 | oir->rpcdata = rpcdata; | ||
102 | INIT_LIST_HEAD(&oir->err_list); | ||
103 | oir->num_comps = num_comps; | ||
104 | oir->ioerrs = ioerrs; | ||
105 | } | ||
106 | |||
103 | /* | 107 | /* |
104 | * Raid engine I/O API | 108 | * Raid engine I/O API |
105 | */ | 109 | */ |
@@ -110,28 +114,24 @@ extern int objio_alloc_lseg(struct pnfs_layout_segment **outp, | |||
110 | gfp_t gfp_flags); | 114 | gfp_t gfp_flags); |
111 | extern void objio_free_lseg(struct pnfs_layout_segment *lseg); | 115 | extern void objio_free_lseg(struct pnfs_layout_segment *lseg); |
112 | 116 | ||
113 | extern int objio_alloc_io_state( | 117 | /* objio_free_result will free these @oir structs recieved from |
114 | struct pnfs_layout_segment *lseg, | 118 | * objlayout_{read,write}_done |
115 | struct objlayout_io_state **outp, | 119 | */ |
116 | gfp_t gfp_flags); | 120 | extern void objio_free_result(struct objlayout_io_res *oir); |
117 | extern void objio_free_io_state(struct objlayout_io_state *state); | ||
118 | 121 | ||
119 | extern ssize_t objio_read_pagelist(struct objlayout_io_state *ol_state); | 122 | extern int objio_read_pagelist(struct nfs_read_data *rdata); |
120 | extern ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state, | 123 | extern int objio_write_pagelist(struct nfs_write_data *wdata, int how); |
121 | bool stable); | ||
122 | 124 | ||
123 | /* | 125 | /* |
124 | * callback API | 126 | * callback API |
125 | */ | 127 | */ |
126 | extern void objlayout_io_set_result(struct objlayout_io_state *state, | 128 | extern void objlayout_io_set_result(struct objlayout_io_res *oir, |
127 | unsigned index, struct pnfs_osd_objid *pooid, | 129 | unsigned index, struct pnfs_osd_objid *pooid, |
128 | int osd_error, u64 offset, u64 length, bool is_write); | 130 | int osd_error, u64 offset, u64 length, bool is_write); |
129 | 131 | ||
130 | static inline void | 132 | static inline void |
131 | objlayout_add_delta_space_used(struct objlayout_io_state *state, s64 space_used) | 133 | objlayout_add_delta_space_used(struct objlayout *objlay, s64 space_used) |
132 | { | 134 | { |
133 | struct objlayout *objlay = OBJLAYOUT(state->lseg->pls_layout); | ||
134 | |||
135 | /* If one of the I/Os errored out and the delta_space_used was | 135 | /* If one of the I/Os errored out and the delta_space_used was |
136 | * invalid we render the complete report as invalid. Protocol mandate | 136 | * invalid we render the complete report as invalid. Protocol mandate |
137 | * the DSU be accurate or not reported. | 137 | * the DSU be accurate or not reported. |
@@ -144,9 +144,9 @@ objlayout_add_delta_space_used(struct objlayout_io_state *state, s64 space_used) | |||
144 | spin_unlock(&objlay->lock); | 144 | spin_unlock(&objlay->lock); |
145 | } | 145 | } |
146 | 146 | ||
147 | extern void objlayout_read_done(struct objlayout_io_state *state, | 147 | extern void objlayout_read_done(struct objlayout_io_res *oir, |
148 | ssize_t status, bool sync); | 148 | ssize_t status, bool sync); |
149 | extern void objlayout_write_done(struct objlayout_io_state *state, | 149 | extern void objlayout_write_done(struct objlayout_io_res *oir, |
150 | ssize_t status, bool sync); | 150 | ssize_t status, bool sync); |
151 | 151 | ||
152 | extern int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay, | 152 | extern int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay, |
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index b60970cc7f1f..0a5ff5c19511 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c | |||
@@ -41,7 +41,7 @@ nfs_page_free(struct nfs_page *p) | |||
41 | 41 | ||
42 | /** | 42 | /** |
43 | * nfs_create_request - Create an NFS read/write request. | 43 | * nfs_create_request - Create an NFS read/write request. |
44 | * @file: file descriptor to use | 44 | * @ctx: open context to use |
45 | * @inode: inode to which the request is attached | 45 | * @inode: inode to which the request is attached |
46 | * @page: page to write | 46 | * @page: page to write |
47 | * @offset: starting offset within the page for the write | 47 | * @offset: starting offset within the page for the write |
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index ee73d9a4f700..a2478bc74442 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
@@ -1443,17 +1443,31 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync) | |||
1443 | /* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */ | 1443 | /* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */ |
1444 | data = kzalloc(sizeof(*data), GFP_NOFS); | 1444 | data = kzalloc(sizeof(*data), GFP_NOFS); |
1445 | if (!data) { | 1445 | if (!data) { |
1446 | mark_inode_dirty_sync(inode); | ||
1447 | status = -ENOMEM; | 1446 | status = -ENOMEM; |
1448 | goto out; | 1447 | goto out; |
1449 | } | 1448 | } |
1450 | 1449 | ||
1450 | if (!test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) | ||
1451 | goto out_free; | ||
1452 | |||
1453 | if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) { | ||
1454 | if (!sync) { | ||
1455 | status = -EAGAIN; | ||
1456 | goto out_free; | ||
1457 | } | ||
1458 | status = wait_on_bit_lock(&nfsi->flags, NFS_INO_LAYOUTCOMMITTING, | ||
1459 | nfs_wait_bit_killable, TASK_KILLABLE); | ||
1460 | if (status) | ||
1461 | goto out_free; | ||
1462 | } | ||
1463 | |||
1451 | INIT_LIST_HEAD(&data->lseg_list); | 1464 | INIT_LIST_HEAD(&data->lseg_list); |
1452 | spin_lock(&inode->i_lock); | 1465 | spin_lock(&inode->i_lock); |
1453 | if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { | 1466 | if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { |
1467 | clear_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags); | ||
1454 | spin_unlock(&inode->i_lock); | 1468 | spin_unlock(&inode->i_lock); |
1455 | kfree(data); | 1469 | wake_up_bit(&nfsi->flags, NFS_INO_LAYOUTCOMMITTING); |
1456 | goto out; | 1470 | goto out_free; |
1457 | } | 1471 | } |
1458 | 1472 | ||
1459 | pnfs_list_write_lseg(inode, &data->lseg_list); | 1473 | pnfs_list_write_lseg(inode, &data->lseg_list); |
@@ -1475,6 +1489,11 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync) | |||
1475 | 1489 | ||
1476 | status = nfs4_proc_layoutcommit(data, sync); | 1490 | status = nfs4_proc_layoutcommit(data, sync); |
1477 | out: | 1491 | out: |
1492 | if (status) | ||
1493 | mark_inode_dirty_sync(inode); | ||
1478 | dprintk("<-- %s status %d\n", __func__, status); | 1494 | dprintk("<-- %s status %d\n", __func__, status); |
1479 | return status; | 1495 | return status; |
1496 | out_free: | ||
1497 | kfree(data); | ||
1498 | goto out; | ||
1480 | } | 1499 | } |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 2219c88d96b2..b016b8a36399 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -1243,7 +1243,6 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) | |||
1243 | { | 1243 | { |
1244 | struct nfs_writeargs *argp = &data->args; | 1244 | struct nfs_writeargs *argp = &data->args; |
1245 | struct nfs_writeres *resp = &data->res; | 1245 | struct nfs_writeres *resp = &data->res; |
1246 | struct nfs_server *server = NFS_SERVER(data->inode); | ||
1247 | int status; | 1246 | int status; |
1248 | 1247 | ||
1249 | dprintk("NFS: %5u nfs_writeback_done (status %d)\n", | 1248 | dprintk("NFS: %5u nfs_writeback_done (status %d)\n", |
@@ -1277,7 +1276,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) | |||
1277 | if (time_before(complain, jiffies)) { | 1276 | if (time_before(complain, jiffies)) { |
1278 | dprintk("NFS: faulty NFS server %s:" | 1277 | dprintk("NFS: faulty NFS server %s:" |
1279 | " (committed = %d) != (stable = %d)\n", | 1278 | " (committed = %d) != (stable = %d)\n", |
1280 | server->nfs_client->cl_hostname, | 1279 | NFS_SERVER(data->inode)->nfs_client->cl_hostname, |
1281 | resp->verf->committed, argp->stable); | 1280 | resp->verf->committed, argp->stable); |
1282 | complain = jiffies + 300 * HZ; | 1281 | complain = jiffies + 300 * HZ; |
1283 | } | 1282 | } |
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index dc5a1bf476b1..52cd976b6099 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c | |||
@@ -256,6 +256,8 @@ static void nfsd_last_thread(struct svc_serv *serv) | |||
256 | nfsd_serv = NULL; | 256 | nfsd_serv = NULL; |
257 | nfsd_shutdown(); | 257 | nfsd_shutdown(); |
258 | 258 | ||
259 | svc_rpcb_cleanup(serv); | ||
260 | |||
259 | printk(KERN_WARNING "nfsd: last server has exited, flushing export " | 261 | printk(KERN_WARNING "nfsd: last server has exited, flushing export " |
260 | "cache\n"); | 262 | "cache\n"); |
261 | nfsd_export_flush(); | 263 | nfsd_export_flush(); |
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 60a137b7f171..ab2c6343361a 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h | |||
@@ -229,6 +229,7 @@ struct nfs_inode { | |||
229 | #define NFS_INO_COMMIT (7) /* inode is committing unstable writes */ | 229 | #define NFS_INO_COMMIT (7) /* inode is committing unstable writes */ |
230 | #define NFS_INO_PNFS_COMMIT (8) /* use pnfs code for commit */ | 230 | #define NFS_INO_PNFS_COMMIT (8) /* use pnfs code for commit */ |
231 | #define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */ | 231 | #define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */ |
232 | #define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */ | ||
232 | 233 | ||
233 | static inline struct nfs_inode *NFS_I(const struct inode *inode) | 234 | static inline struct nfs_inode *NFS_I(const struct inode *inode) |
234 | { | 235 | { |
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 492486a74484..3d8f9c44e27d 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h | |||
@@ -136,6 +136,8 @@ void rpc_shutdown_client(struct rpc_clnt *); | |||
136 | void rpc_release_client(struct rpc_clnt *); | 136 | void rpc_release_client(struct rpc_clnt *); |
137 | void rpc_task_release_client(struct rpc_task *); | 137 | void rpc_task_release_client(struct rpc_task *); |
138 | 138 | ||
139 | int rpcb_create_local(void); | ||
140 | void rpcb_put_local(void); | ||
139 | int rpcb_register(u32, u32, int, unsigned short); | 141 | int rpcb_register(u32, u32, int, unsigned short); |
140 | int rpcb_v4_register(const u32 program, const u32 version, | 142 | int rpcb_v4_register(const u32 program, const u32 version, |
141 | const struct sockaddr *address, | 143 | const struct sockaddr *address, |
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index d8d5d93071b3..35b37b1e9299 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h | |||
@@ -413,6 +413,7 @@ struct svc_procedure { | |||
413 | /* | 413 | /* |
414 | * Function prototypes. | 414 | * Function prototypes. |
415 | */ | 415 | */ |
416 | void svc_rpcb_cleanup(struct svc_serv *serv); | ||
416 | struct svc_serv *svc_create(struct svc_program *, unsigned int, | 417 | struct svc_serv *svc_create(struct svc_program *, unsigned int, |
417 | void (*shutdown)(struct svc_serv *)); | 418 | void (*shutdown)(struct svc_serv *)); |
418 | struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, | 419 | struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, |
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 4cb70dc6e7ad..e50502d8ceb7 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c | |||
@@ -129,6 +129,9 @@ unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags) | |||
129 | for (i = 0; i < groups ; i++) | 129 | for (i = 0; i < groups ; i++) |
130 | if (cred->uc_gids[i] != GROUP_AT(acred->group_info, i)) | 130 | if (cred->uc_gids[i] != GROUP_AT(acred->group_info, i)) |
131 | return 0; | 131 | return 0; |
132 | if (groups < NFS_NGROUPS && | ||
133 | cred->uc_gids[groups] != NOGROUP) | ||
134 | return 0; | ||
132 | return 1; | 135 | return 1; |
133 | } | 136 | } |
134 | 137 | ||
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index f588b852d41c..8761bf8e36fc 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c | |||
@@ -114,6 +114,9 @@ static struct rpc_program rpcb_program; | |||
114 | static struct rpc_clnt * rpcb_local_clnt; | 114 | static struct rpc_clnt * rpcb_local_clnt; |
115 | static struct rpc_clnt * rpcb_local_clnt4; | 115 | static struct rpc_clnt * rpcb_local_clnt4; |
116 | 116 | ||
117 | DEFINE_SPINLOCK(rpcb_clnt_lock); | ||
118 | unsigned int rpcb_users; | ||
119 | |||
117 | struct rpcbind_args { | 120 | struct rpcbind_args { |
118 | struct rpc_xprt * r_xprt; | 121 | struct rpc_xprt * r_xprt; |
119 | 122 | ||
@@ -161,6 +164,56 @@ static void rpcb_map_release(void *data) | |||
161 | kfree(map); | 164 | kfree(map); |
162 | } | 165 | } |
163 | 166 | ||
167 | static int rpcb_get_local(void) | ||
168 | { | ||
169 | int cnt; | ||
170 | |||
171 | spin_lock(&rpcb_clnt_lock); | ||
172 | if (rpcb_users) | ||
173 | rpcb_users++; | ||
174 | cnt = rpcb_users; | ||
175 | spin_unlock(&rpcb_clnt_lock); | ||
176 | |||
177 | return cnt; | ||
178 | } | ||
179 | |||
180 | void rpcb_put_local(void) | ||
181 | { | ||
182 | struct rpc_clnt *clnt = rpcb_local_clnt; | ||
183 | struct rpc_clnt *clnt4 = rpcb_local_clnt4; | ||
184 | int shutdown; | ||
185 | |||
186 | spin_lock(&rpcb_clnt_lock); | ||
187 | if (--rpcb_users == 0) { | ||
188 | rpcb_local_clnt = NULL; | ||
189 | rpcb_local_clnt4 = NULL; | ||
190 | } | ||
191 | shutdown = !rpcb_users; | ||
192 | spin_unlock(&rpcb_clnt_lock); | ||
193 | |||
194 | if (shutdown) { | ||
195 | /* | ||
196 | * cleanup_rpcb_clnt - remove xprtsock's sysctls, unregister | ||
197 | */ | ||
198 | if (clnt4) | ||
199 | rpc_shutdown_client(clnt4); | ||
200 | if (clnt) | ||
201 | rpc_shutdown_client(clnt); | ||
202 | } | ||
203 | } | ||
204 | |||
205 | static void rpcb_set_local(struct rpc_clnt *clnt, struct rpc_clnt *clnt4) | ||
206 | { | ||
207 | /* Protected by rpcb_create_local_mutex */ | ||
208 | rpcb_local_clnt = clnt; | ||
209 | rpcb_local_clnt4 = clnt4; | ||
210 | smp_wmb(); | ||
211 | rpcb_users = 1; | ||
212 | dprintk("RPC: created new rpcb local clients (rpcb_local_clnt: " | ||
213 | "%p, rpcb_local_clnt4: %p)\n", rpcb_local_clnt, | ||
214 | rpcb_local_clnt4); | ||
215 | } | ||
216 | |||
164 | /* | 217 | /* |
165 | * Returns zero on success, otherwise a negative errno value | 218 | * Returns zero on success, otherwise a negative errno value |
166 | * is returned. | 219 | * is returned. |
@@ -205,9 +258,7 @@ static int rpcb_create_local_unix(void) | |||
205 | clnt4 = NULL; | 258 | clnt4 = NULL; |
206 | } | 259 | } |
207 | 260 | ||
208 | /* Protected by rpcb_create_local_mutex */ | 261 | rpcb_set_local(clnt, clnt4); |
209 | rpcb_local_clnt = clnt; | ||
210 | rpcb_local_clnt4 = clnt4; | ||
211 | 262 | ||
212 | out: | 263 | out: |
213 | return result; | 264 | return result; |
@@ -259,9 +310,7 @@ static int rpcb_create_local_net(void) | |||
259 | clnt4 = NULL; | 310 | clnt4 = NULL; |
260 | } | 311 | } |
261 | 312 | ||
262 | /* Protected by rpcb_create_local_mutex */ | 313 | rpcb_set_local(clnt, clnt4); |
263 | rpcb_local_clnt = clnt; | ||
264 | rpcb_local_clnt4 = clnt4; | ||
265 | 314 | ||
266 | out: | 315 | out: |
267 | return result; | 316 | return result; |
@@ -271,16 +320,16 @@ out: | |||
271 | * Returns zero on success, otherwise a negative errno value | 320 | * Returns zero on success, otherwise a negative errno value |
272 | * is returned. | 321 | * is returned. |
273 | */ | 322 | */ |
274 | static int rpcb_create_local(void) | 323 | int rpcb_create_local(void) |
275 | { | 324 | { |
276 | static DEFINE_MUTEX(rpcb_create_local_mutex); | 325 | static DEFINE_MUTEX(rpcb_create_local_mutex); |
277 | int result = 0; | 326 | int result = 0; |
278 | 327 | ||
279 | if (rpcb_local_clnt) | 328 | if (rpcb_get_local()) |
280 | return result; | 329 | return result; |
281 | 330 | ||
282 | mutex_lock(&rpcb_create_local_mutex); | 331 | mutex_lock(&rpcb_create_local_mutex); |
283 | if (rpcb_local_clnt) | 332 | if (rpcb_get_local()) |
284 | goto out; | 333 | goto out; |
285 | 334 | ||
286 | if (rpcb_create_local_unix() != 0) | 335 | if (rpcb_create_local_unix() != 0) |
@@ -382,11 +431,6 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port) | |||
382 | struct rpc_message msg = { | 431 | struct rpc_message msg = { |
383 | .rpc_argp = &map, | 432 | .rpc_argp = &map, |
384 | }; | 433 | }; |
385 | int error; | ||
386 | |||
387 | error = rpcb_create_local(); | ||
388 | if (error) | ||
389 | return error; | ||
390 | 434 | ||
391 | dprintk("RPC: %sregistering (%u, %u, %d, %u) with local " | 435 | dprintk("RPC: %sregistering (%u, %u, %d, %u) with local " |
392 | "rpcbind\n", (port ? "" : "un"), | 436 | "rpcbind\n", (port ? "" : "un"), |
@@ -522,11 +566,7 @@ int rpcb_v4_register(const u32 program, const u32 version, | |||
522 | struct rpc_message msg = { | 566 | struct rpc_message msg = { |
523 | .rpc_argp = &map, | 567 | .rpc_argp = &map, |
524 | }; | 568 | }; |
525 | int error; | ||
526 | 569 | ||
527 | error = rpcb_create_local(); | ||
528 | if (error) | ||
529 | return error; | ||
530 | if (rpcb_local_clnt4 == NULL) | 570 | if (rpcb_local_clnt4 == NULL) |
531 | return -EPROTONOSUPPORT; | 571 | return -EPROTONOSUPPORT; |
532 | 572 | ||
@@ -1060,15 +1100,3 @@ static struct rpc_program rpcb_program = { | |||
1060 | .version = rpcb_version, | 1100 | .version = rpcb_version, |
1061 | .stats = &rpcb_stats, | 1101 | .stats = &rpcb_stats, |
1062 | }; | 1102 | }; |
1063 | |||
1064 | /** | ||
1065 | * cleanup_rpcb_clnt - remove xprtsock's sysctls, unregister | ||
1066 | * | ||
1067 | */ | ||
1068 | void cleanup_rpcb_clnt(void) | ||
1069 | { | ||
1070 | if (rpcb_local_clnt4) | ||
1071 | rpc_shutdown_client(rpcb_local_clnt4); | ||
1072 | if (rpcb_local_clnt) | ||
1073 | rpc_shutdown_client(rpcb_local_clnt); | ||
1074 | } | ||
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 9d0809160994..8ec9778c3f4a 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c | |||
@@ -61,8 +61,6 @@ static struct pernet_operations sunrpc_net_ops = { | |||
61 | 61 | ||
62 | extern struct cache_detail unix_gid_cache; | 62 | extern struct cache_detail unix_gid_cache; |
63 | 63 | ||
64 | extern void cleanup_rpcb_clnt(void); | ||
65 | |||
66 | static int __init | 64 | static int __init |
67 | init_sunrpc(void) | 65 | init_sunrpc(void) |
68 | { | 66 | { |
@@ -102,7 +100,6 @@ out: | |||
102 | static void __exit | 100 | static void __exit |
103 | cleanup_sunrpc(void) | 101 | cleanup_sunrpc(void) |
104 | { | 102 | { |
105 | cleanup_rpcb_clnt(); | ||
106 | rpcauth_remove_module(); | 103 | rpcauth_remove_module(); |
107 | cleanup_socket_xprt(); | 104 | cleanup_socket_xprt(); |
108 | svc_cleanup_xprt_sock(); | 105 | svc_cleanup_xprt_sock(); |
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index dd5cc00ed559..6e038884ae0c 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c | |||
@@ -366,6 +366,42 @@ svc_pool_for_cpu(struct svc_serv *serv, int cpu) | |||
366 | return &serv->sv_pools[pidx % serv->sv_nrpools]; | 366 | return &serv->sv_pools[pidx % serv->sv_nrpools]; |
367 | } | 367 | } |
368 | 368 | ||
369 | static int svc_rpcb_setup(struct svc_serv *serv) | ||
370 | { | ||
371 | int err; | ||
372 | |||
373 | err = rpcb_create_local(); | ||
374 | if (err) | ||
375 | return err; | ||
376 | |||
377 | /* Remove any stale portmap registrations */ | ||
378 | svc_unregister(serv); | ||
379 | return 0; | ||
380 | } | ||
381 | |||
382 | void svc_rpcb_cleanup(struct svc_serv *serv) | ||
383 | { | ||
384 | svc_unregister(serv); | ||
385 | rpcb_put_local(); | ||
386 | } | ||
387 | EXPORT_SYMBOL_GPL(svc_rpcb_cleanup); | ||
388 | |||
389 | static int svc_uses_rpcbind(struct svc_serv *serv) | ||
390 | { | ||
391 | struct svc_program *progp; | ||
392 | unsigned int i; | ||
393 | |||
394 | for (progp = serv->sv_program; progp; progp = progp->pg_next) { | ||
395 | for (i = 0; i < progp->pg_nvers; i++) { | ||
396 | if (progp->pg_vers[i] == NULL) | ||
397 | continue; | ||
398 | if (progp->pg_vers[i]->vs_hidden == 0) | ||
399 | return 1; | ||
400 | } | ||
401 | } | ||
402 | |||
403 | return 0; | ||
404 | } | ||
369 | 405 | ||
370 | /* | 406 | /* |
371 | * Create an RPC service | 407 | * Create an RPC service |
@@ -431,8 +467,15 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, | |||
431 | spin_lock_init(&pool->sp_lock); | 467 | spin_lock_init(&pool->sp_lock); |
432 | } | 468 | } |
433 | 469 | ||
434 | /* Remove any stale portmap registrations */ | 470 | if (svc_uses_rpcbind(serv)) { |
435 | svc_unregister(serv); | 471 | if (svc_rpcb_setup(serv) < 0) { |
472 | kfree(serv->sv_pools); | ||
473 | kfree(serv); | ||
474 | return NULL; | ||
475 | } | ||
476 | if (!serv->sv_shutdown) | ||
477 | serv->sv_shutdown = svc_rpcb_cleanup; | ||
478 | } | ||
436 | 479 | ||
437 | return serv; | 480 | return serv; |
438 | } | 481 | } |
@@ -500,7 +543,6 @@ svc_destroy(struct svc_serv *serv) | |||
500 | if (svc_serv_is_pooled(serv)) | 543 | if (svc_serv_is_pooled(serv)) |
501 | svc_pool_map_put(); | 544 | svc_pool_map_put(); |
502 | 545 | ||
503 | svc_unregister(serv); | ||
504 | kfree(serv->sv_pools); | 546 | kfree(serv->sv_pools); |
505 | kfree(serv); | 547 | kfree(serv); |
506 | } | 548 | } |