diff options
author | Boaz Harrosh <bharrosh@panasas.com> | 2009-11-08 07:54:08 -0500 |
---|---|---|
committer | Boaz Harrosh <bharrosh@panasas.com> | 2009-12-10 02:59:22 -0500 |
commit | 06886a5a3dc5a5abe0a4d257c26317bde7047be8 (patch) | |
tree | 858ac56e120c0473d764fc64a2660e6d79729c8c /fs/exofs/ios.c | |
parent | 8ce9bdd1fbe962933736d7977e972972cd5d754c (diff) |
exofs: Move all operations to an io_engine
In anticipation for multi-device operations, we separate osd operations
into an abstract I/O API. Currently only one device is used but later
when adding more devices, we will drive all devices in parallel according
to a "data_map" that describes how data is arranged on multiple devices.
The file system level operates, like before, as if there is one object
(inode-number) and an i_size. The io engine will split this to the same
object-number but on multiple device.
At first we introduce Mirror (raid 1) layout. But at the final outcome
we intend to fully implement the pNFS-Objects data-map, including
raid 0,4,5,6 over mirrored devices, over multiple device-groups. And
more. See: http://tools.ietf.org/html/draft-ietf-nfsv4-pnfs-obj-12
* Define an io_state based API for accessing osd storage devices
in an abstract way.
Usage:
First a caller allocates an io state with:
exofs_get_io_state(struct exofs_sb_info *sbi,
struct exofs_io_state** ios);
Then calles one of:
exofs_sbi_create(struct exofs_io_state *ios);
exofs_sbi_remove(struct exofs_io_state *ios);
exofs_sbi_write(struct exofs_io_state *ios);
exofs_sbi_read(struct exofs_io_state *ios);
exofs_oi_truncate(struct exofs_i_info *oi, u64 new_len);
And when done
exofs_put_io_state(struct exofs_io_state *ios);
* Convert all source files to use this new API
* Convert from bio_alloc to bio_kmalloc
* In io engine we make use of the now fixed osd_req_decode_sense
There are no functional changes or on disk additions after this patch.
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Diffstat (limited to 'fs/exofs/ios.c')
-rw-r--r-- | fs/exofs/ios.c | 386 |
1 files changed, 333 insertions, 53 deletions
diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c index 4372542df284..bb2f9d341fdf 100644 --- a/fs/exofs/ios.c +++ b/fs/exofs/ios.c | |||
@@ -23,88 +23,327 @@ | |||
23 | */ | 23 | */ |
24 | 24 | ||
25 | #include <scsi/scsi_device.h> | 25 | #include <scsi/scsi_device.h> |
26 | #include <scsi/osd_sense.h> | ||
27 | 26 | ||
28 | #include "exofs.h" | 27 | #include "exofs.h" |
29 | 28 | ||
30 | int exofs_check_ok_resid(struct osd_request *or, u64 *in_resid, u64 *out_resid) | 29 | void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj) |
31 | { | 30 | { |
32 | struct osd_sense_info osi; | 31 | osd_sec_init_nosec_doall_caps(cred_a, obj, false, true); |
33 | int ret = osd_req_decode_sense(or, &osi); | 32 | } |
34 | 33 | ||
35 | if (ret) { /* translate to Linux codes */ | 34 | int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj, |
36 | if (osi.additional_code == scsi_invalid_field_in_cdb) { | 35 | u64 offset, void *p, unsigned length) |
37 | if (osi.cdb_field_offset == OSD_CFO_STARTING_BYTE) | 36 | { |
38 | ret = -EFAULT; | 37 | struct osd_request *or = osd_start_request(od, GFP_KERNEL); |
39 | if (osi.cdb_field_offset == OSD_CFO_OBJECT_ID) | 38 | /* struct osd_sense_info osi = {.key = 0};*/ |
40 | ret = -ENOENT; | 39 | int ret; |
41 | else | 40 | |
42 | ret = -EINVAL; | 41 | if (unlikely(!or)) { |
43 | } else if (osi.additional_code == osd_quota_error) | 42 | EXOFS_DBGMSG("%s: osd_start_request failed.\n", __func__); |
44 | ret = -ENOSPC; | 43 | return -ENOMEM; |
45 | else | 44 | } |
46 | ret = -EIO; | 45 | ret = osd_req_read_kern(or, obj, offset, p, length); |
46 | if (unlikely(ret)) { | ||
47 | EXOFS_DBGMSG("%s: osd_req_read_kern failed.\n", __func__); | ||
48 | goto out; | ||
47 | } | 49 | } |
48 | 50 | ||
49 | /* FIXME: should be include in osd_sense_info */ | 51 | ret = osd_finalize_request(or, 0, cred, NULL); |
50 | if (in_resid) | 52 | if (unlikely(ret)) { |
51 | *in_resid = or->in.req ? or->in.req->resid_len : 0; | 53 | EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", ret); |
54 | goto out; | ||
55 | } | ||
52 | 56 | ||
53 | if (out_resid) | 57 | ret = osd_execute_request(or); |
54 | *out_resid = or->out.req ? or->out.req->resid_len : 0; | 58 | if (unlikely(ret)) |
59 | EXOFS_DBGMSG("osd_execute_request() => %d\n", ret); | ||
60 | /* osd_req_decode_sense(or, ret); */ | ||
55 | 61 | ||
62 | out: | ||
63 | osd_end_request(or); | ||
56 | return ret; | 64 | return ret; |
57 | } | 65 | } |
58 | 66 | ||
59 | void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj) | 67 | int exofs_get_io_state(struct exofs_sb_info *sbi, struct exofs_io_state** pios) |
60 | { | 68 | { |
61 | osd_sec_init_nosec_doall_caps(cred_a, obj, false, true); | 69 | struct exofs_io_state *ios; |
70 | |||
71 | /*TODO: Maybe use kmem_cach per sbi of size | ||
72 | * exofs_io_state_size(sbi->s_numdevs) | ||
73 | */ | ||
74 | ios = kzalloc(exofs_io_state_size(1), GFP_KERNEL); | ||
75 | if (unlikely(!ios)) { | ||
76 | *pios = NULL; | ||
77 | return -ENOMEM; | ||
78 | } | ||
79 | |||
80 | ios->sbi = sbi; | ||
81 | ios->obj.partition = sbi->s_pid; | ||
82 | *pios = ios; | ||
83 | return 0; | ||
62 | } | 84 | } |
63 | 85 | ||
64 | /* | 86 | void exofs_put_io_state(struct exofs_io_state *ios) |
65 | * Perform a synchronous OSD operation. | ||
66 | */ | ||
67 | int exofs_sync_op(struct osd_request *or, int timeout, uint8_t *credential) | ||
68 | { | 87 | { |
69 | int ret; | 88 | if (ios) { |
89 | unsigned i; | ||
70 | 90 | ||
71 | or->timeout = timeout; | 91 | for (i = 0; i < ios->numdevs; i++) { |
72 | ret = osd_finalize_request(or, 0, credential, NULL); | 92 | struct exofs_per_dev_state *per_dev = &ios->per_dev[i]; |
73 | if (ret) { | 93 | |
74 | EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", ret); | 94 | if (per_dev->or) |
75 | return ret; | 95 | osd_end_request(per_dev->or); |
96 | if (per_dev->bio) | ||
97 | bio_put(per_dev->bio); | ||
98 | } | ||
99 | |||
100 | kfree(ios); | ||
76 | } | 101 | } |
102 | } | ||
77 | 103 | ||
78 | ret = osd_execute_request(or); | 104 | static void _sync_done(struct exofs_io_state *ios, void *p) |
105 | { | ||
106 | struct completion *waiting = p; | ||
79 | 107 | ||
80 | if (ret) | 108 | complete(waiting); |
81 | EXOFS_DBGMSG("osd_execute_request() => %d\n", ret); | 109 | } |
82 | /* osd_req_decode_sense(or, ret); */ | 110 | |
111 | static void _last_io(struct kref *kref) | ||
112 | { | ||
113 | struct exofs_io_state *ios = container_of( | ||
114 | kref, struct exofs_io_state, kref); | ||
115 | |||
116 | ios->done(ios, ios->private); | ||
117 | } | ||
118 | |||
119 | static void _done_io(struct osd_request *or, void *p) | ||
120 | { | ||
121 | struct exofs_io_state *ios = p; | ||
122 | |||
123 | kref_put(&ios->kref, _last_io); | ||
124 | } | ||
125 | |||
126 | static int exofs_io_execute(struct exofs_io_state *ios) | ||
127 | { | ||
128 | DECLARE_COMPLETION_ONSTACK(wait); | ||
129 | bool sync = (ios->done == NULL); | ||
130 | int i, ret; | ||
131 | |||
132 | if (sync) { | ||
133 | ios->done = _sync_done; | ||
134 | ios->private = &wait; | ||
135 | } | ||
136 | |||
137 | for (i = 0; i < ios->numdevs; i++) { | ||
138 | struct osd_request *or = ios->per_dev[i].or; | ||
139 | if (unlikely(!or)) | ||
140 | continue; | ||
141 | |||
142 | ret = osd_finalize_request(or, 0, ios->cred, NULL); | ||
143 | if (unlikely(ret)) { | ||
144 | EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", | ||
145 | ret); | ||
146 | return ret; | ||
147 | } | ||
148 | } | ||
149 | |||
150 | kref_init(&ios->kref); | ||
151 | |||
152 | for (i = 0; i < ios->numdevs; i++) { | ||
153 | struct osd_request *or = ios->per_dev[i].or; | ||
154 | if (unlikely(!or)) | ||
155 | continue; | ||
156 | |||
157 | kref_get(&ios->kref); | ||
158 | osd_execute_request_async(or, _done_io, ios); | ||
159 | } | ||
160 | |||
161 | kref_put(&ios->kref, _last_io); | ||
162 | ret = 0; | ||
163 | |||
164 | if (sync) { | ||
165 | wait_for_completion(&wait); | ||
166 | ret = exofs_check_io(ios, NULL); | ||
167 | } | ||
83 | return ret; | 168 | return ret; |
84 | } | 169 | } |
85 | 170 | ||
86 | /* | 171 | int exofs_check_io(struct exofs_io_state *ios, u64 *resid) |
87 | * Perform an asynchronous OSD operation. | ||
88 | */ | ||
89 | int exofs_async_op(struct osd_request *or, osd_req_done_fn *async_done, | ||
90 | void *caller_context, u8 *cred) | ||
91 | { | 172 | { |
92 | int ret; | 173 | enum osd_err_priority acumulated_osd_err = 0; |
174 | int acumulated_lin_err = 0; | ||
175 | int i; | ||
93 | 176 | ||
94 | ret = osd_finalize_request(or, 0, cred, NULL); | 177 | for (i = 0; i < ios->numdevs; i++) { |
95 | if (ret) { | 178 | struct osd_sense_info osi; |
96 | EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", ret); | 179 | int ret = osd_req_decode_sense(ios->per_dev[i].or, &osi); |
97 | return ret; | 180 | |
181 | if (likely(!ret)) | ||
182 | continue; | ||
183 | |||
184 | if (unlikely(ret == -EFAULT)) { | ||
185 | EXOFS_DBGMSG("%s: EFAULT Need page clear\n", __func__); | ||
186 | /*FIXME: All the pages in this device range should: | ||
187 | * clear_highpage(page); | ||
188 | */ | ||
189 | } | ||
190 | |||
191 | if (osi.osd_err_pri >= acumulated_osd_err) { | ||
192 | acumulated_osd_err = osi.osd_err_pri; | ||
193 | acumulated_lin_err = ret; | ||
194 | } | ||
195 | } | ||
196 | |||
197 | /* TODO: raid specific residual calculations */ | ||
198 | if (resid) { | ||
199 | if (likely(!acumulated_lin_err)) | ||
200 | *resid = 0; | ||
201 | else | ||
202 | *resid = ios->length; | ||
203 | } | ||
204 | |||
205 | return acumulated_lin_err; | ||
206 | } | ||
207 | |||
208 | int exofs_sbi_create(struct exofs_io_state *ios) | ||
209 | { | ||
210 | int i, ret; | ||
211 | |||
212 | for (i = 0; i < 1; i++) { | ||
213 | struct osd_request *or; | ||
214 | |||
215 | or = osd_start_request(ios->sbi->s_dev, GFP_KERNEL); | ||
216 | if (unlikely(!or)) { | ||
217 | EXOFS_ERR("%s: osd_start_request failed\n", __func__); | ||
218 | ret = -ENOMEM; | ||
219 | goto out; | ||
220 | } | ||
221 | ios->per_dev[i].or = or; | ||
222 | ios->numdevs++; | ||
223 | |||
224 | osd_req_create_object(or, &ios->obj); | ||
225 | } | ||
226 | ret = exofs_io_execute(ios); | ||
227 | |||
228 | out: | ||
229 | return ret; | ||
230 | } | ||
231 | |||
232 | int exofs_sbi_remove(struct exofs_io_state *ios) | ||
233 | { | ||
234 | int i, ret; | ||
235 | |||
236 | for (i = 0; i < 1; i++) { | ||
237 | struct osd_request *or; | ||
238 | |||
239 | or = osd_start_request(ios->sbi->s_dev, GFP_KERNEL); | ||
240 | if (unlikely(!or)) { | ||
241 | EXOFS_ERR("%s: osd_start_request failed\n", __func__); | ||
242 | ret = -ENOMEM; | ||
243 | goto out; | ||
244 | } | ||
245 | ios->per_dev[i].or = or; | ||
246 | ios->numdevs++; | ||
247 | |||
248 | osd_req_remove_object(or, &ios->obj); | ||
249 | } | ||
250 | ret = exofs_io_execute(ios); | ||
251 | |||
252 | out: | ||
253 | return ret; | ||
254 | } | ||
255 | |||
256 | int exofs_sbi_write(struct exofs_io_state *ios) | ||
257 | { | ||
258 | int i, ret; | ||
259 | |||
260 | for (i = 0; i < 1; i++) { | ||
261 | struct osd_request *or; | ||
262 | |||
263 | or = osd_start_request(ios->sbi->s_dev, GFP_KERNEL); | ||
264 | if (unlikely(!or)) { | ||
265 | EXOFS_ERR("%s: osd_start_request failed\n", __func__); | ||
266 | ret = -ENOMEM; | ||
267 | goto out; | ||
268 | } | ||
269 | ios->per_dev[i].or = or; | ||
270 | ios->numdevs++; | ||
271 | |||
272 | if (ios->bio) { | ||
273 | struct bio *bio; | ||
274 | |||
275 | bio = ios->bio; | ||
276 | |||
277 | osd_req_write(or, &ios->obj, ios->offset, bio, | ||
278 | ios->length); | ||
279 | /* EXOFS_DBGMSG("write sync=%d\n", sync);*/ | ||
280 | } else if (ios->kern_buff) { | ||
281 | osd_req_write_kern(or, &ios->obj, ios->offset, | ||
282 | ios->kern_buff, ios->length); | ||
283 | /* EXOFS_DBGMSG("write_kern sync=%d\n", sync);*/ | ||
284 | } else { | ||
285 | osd_req_set_attributes(or, &ios->obj); | ||
286 | /* EXOFS_DBGMSG("set_attributes sync=%d\n", sync);*/ | ||
287 | } | ||
288 | |||
289 | if (ios->out_attr) | ||
290 | osd_req_add_set_attr_list(or, ios->out_attr, | ||
291 | ios->out_attr_len); | ||
292 | |||
293 | if (ios->in_attr) | ||
294 | osd_req_add_get_attr_list(or, ios->in_attr, | ||
295 | ios->in_attr_len); | ||
98 | } | 296 | } |
297 | ret = exofs_io_execute(ios); | ||
298 | |||
299 | out: | ||
300 | return ret; | ||
301 | } | ||
302 | |||
303 | int exofs_sbi_read(struct exofs_io_state *ios) | ||
304 | { | ||
305 | int i, ret; | ||
306 | |||
307 | for (i = 0; i < 1; i++) { | ||
308 | struct osd_request *or; | ||
309 | |||
310 | or = osd_start_request(ios->sbi->s_dev, GFP_KERNEL); | ||
311 | if (unlikely(!or)) { | ||
312 | EXOFS_ERR("%s: osd_start_request failed\n", __func__); | ||
313 | ret = -ENOMEM; | ||
314 | goto out; | ||
315 | } | ||
316 | ios->per_dev[i].or = or; | ||
317 | ios->numdevs++; | ||
318 | |||
319 | if (ios->bio) { | ||
320 | osd_req_read(or, &ios->obj, ios->offset, ios->bio, | ||
321 | ios->length); | ||
322 | /* EXOFS_DBGMSG("read sync=%d\n", sync);*/ | ||
323 | } else if (ios->kern_buff) { | ||
324 | osd_req_read_kern(or, &ios->obj, ios->offset, | ||
325 | ios->kern_buff, ios->length); | ||
326 | /* EXOFS_DBGMSG("read_kern sync=%d\n", sync);*/ | ||
327 | } else { | ||
328 | osd_req_get_attributes(or, &ios->obj); | ||
329 | /* EXOFS_DBGMSG("get_attributes sync=%d\n", sync);*/ | ||
330 | } | ||
331 | |||
332 | if (ios->out_attr) | ||
333 | osd_req_add_set_attr_list(or, ios->out_attr, | ||
334 | ios->out_attr_len); | ||
99 | 335 | ||
100 | ret = osd_execute_request_async(or, async_done, caller_context); | 336 | if (ios->in_attr) |
337 | osd_req_add_get_attr_list(or, ios->in_attr, | ||
338 | ios->in_attr_len); | ||
339 | } | ||
340 | ret = exofs_io_execute(ios); | ||
101 | 341 | ||
102 | if (ret) | 342 | out: |
103 | EXOFS_DBGMSG("osd_execute_request_async() => %d\n", ret); | ||
104 | return ret; | 343 | return ret; |
105 | } | 344 | } |
106 | 345 | ||
107 | int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr) | 346 | int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr) |
108 | { | 347 | { |
109 | struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */ | 348 | struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */ |
110 | void *iter = NULL; | 349 | void *iter = NULL; |
@@ -112,7 +351,8 @@ int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr) | |||
112 | 351 | ||
113 | do { | 352 | do { |
114 | nelem = 1; | 353 | nelem = 1; |
115 | osd_req_decode_get_attr_list(or, &cur_attr, &nelem, &iter); | 354 | osd_req_decode_get_attr_list(ios->per_dev[0].or, |
355 | &cur_attr, &nelem, &iter); | ||
116 | if ((cur_attr.attr_page == attr->attr_page) && | 356 | if ((cur_attr.attr_page == attr->attr_page) && |
117 | (cur_attr.attr_id == attr->attr_id)) { | 357 | (cur_attr.attr_id == attr->attr_id)) { |
118 | attr->len = cur_attr.len; | 358 | attr->len = cur_attr.len; |
@@ -123,3 +363,43 @@ int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr) | |||
123 | 363 | ||
124 | return -EIO; | 364 | return -EIO; |
125 | } | 365 | } |
366 | |||
367 | int exofs_oi_truncate(struct exofs_i_info *oi, u64 size) | ||
368 | { | ||
369 | struct exofs_sb_info *sbi = oi->vfs_inode.i_sb->s_fs_info; | ||
370 | struct exofs_io_state *ios; | ||
371 | struct osd_attr attr; | ||
372 | __be64 newsize; | ||
373 | int i, ret; | ||
374 | |||
375 | if (exofs_get_io_state(sbi, &ios)) | ||
376 | return -ENOMEM; | ||
377 | |||
378 | ios->obj.id = exofs_oi_objno(oi); | ||
379 | ios->cred = oi->i_cred; | ||
380 | |||
381 | newsize = cpu_to_be64(size); | ||
382 | attr = g_attr_logical_length; | ||
383 | attr.val_ptr = &newsize; | ||
384 | |||
385 | for (i = 0; i < 1; i++) { | ||
386 | struct osd_request *or; | ||
387 | |||
388 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | ||
389 | if (unlikely(!or)) { | ||
390 | EXOFS_ERR("%s: osd_start_request failed\n", __func__); | ||
391 | ret = -ENOMEM; | ||
392 | goto out; | ||
393 | } | ||
394 | ios->per_dev[i].or = or; | ||
395 | ios->numdevs++; | ||
396 | |||
397 | osd_req_set_attributes(or, &ios->obj); | ||
398 | osd_req_add_set_attr_list(or, &attr, 1); | ||
399 | } | ||
400 | ret = exofs_io_execute(ios); | ||
401 | |||
402 | out: | ||
403 | exofs_put_io_state(ios); | ||
404 | return ret; | ||
405 | } | ||