aboutsummaryrefslogtreecommitdiffstats
path: root/fs/exofs/ios.c
diff options
context:
space:
mode:
authorBoaz Harrosh <bharrosh@panasas.com>2009-11-08 07:54:08 -0500
committerBoaz Harrosh <bharrosh@panasas.com>2009-12-10 02:59:22 -0500
commit06886a5a3dc5a5abe0a4d257c26317bde7047be8 (patch)
tree858ac56e120c0473d764fc64a2660e6d79729c8c /fs/exofs/ios.c
parent8ce9bdd1fbe962933736d7977e972972cd5d754c (diff)
exofs: Move all operations to an io_engine
In anticipation for multi-device operations, we separate osd operations into an abstract I/O API. Currently only one device is used but later when adding more devices, we will drive all devices in parallel according to a "data_map" that describes how data is arranged on multiple devices. The file system level operates, like before, as if there is one object (inode-number) and an i_size. The io engine will split this to the same object-number but on multiple device. At first we introduce Mirror (raid 1) layout. But at the final outcome we intend to fully implement the pNFS-Objects data-map, including raid 0,4,5,6 over mirrored devices, over multiple device-groups. And more. See: http://tools.ietf.org/html/draft-ietf-nfsv4-pnfs-obj-12 * Define an io_state based API for accessing osd storage devices in an abstract way. Usage: First a caller allocates an io state with: exofs_get_io_state(struct exofs_sb_info *sbi, struct exofs_io_state** ios); Then calles one of: exofs_sbi_create(struct exofs_io_state *ios); exofs_sbi_remove(struct exofs_io_state *ios); exofs_sbi_write(struct exofs_io_state *ios); exofs_sbi_read(struct exofs_io_state *ios); exofs_oi_truncate(struct exofs_i_info *oi, u64 new_len); And when done exofs_put_io_state(struct exofs_io_state *ios); * Convert all source files to use this new API * Convert from bio_alloc to bio_kmalloc * In io engine we make use of the now fixed osd_req_decode_sense There are no functional changes or on disk additions after this patch. Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Diffstat (limited to 'fs/exofs/ios.c')
-rw-r--r--fs/exofs/ios.c386
1 files changed, 333 insertions, 53 deletions
diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c
index 4372542df284..bb2f9d341fdf 100644
--- a/fs/exofs/ios.c
+++ b/fs/exofs/ios.c
@@ -23,88 +23,327 @@
23 */ 23 */
24 24
25#include <scsi/scsi_device.h> 25#include <scsi/scsi_device.h>
26#include <scsi/osd_sense.h>
27 26
28#include "exofs.h" 27#include "exofs.h"
29 28
30int exofs_check_ok_resid(struct osd_request *or, u64 *in_resid, u64 *out_resid) 29void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj)
31{ 30{
32 struct osd_sense_info osi; 31 osd_sec_init_nosec_doall_caps(cred_a, obj, false, true);
33 int ret = osd_req_decode_sense(or, &osi); 32}
34 33
35 if (ret) { /* translate to Linux codes */ 34int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj,
36 if (osi.additional_code == scsi_invalid_field_in_cdb) { 35 u64 offset, void *p, unsigned length)
37 if (osi.cdb_field_offset == OSD_CFO_STARTING_BYTE) 36{
38 ret = -EFAULT; 37 struct osd_request *or = osd_start_request(od, GFP_KERNEL);
39 if (osi.cdb_field_offset == OSD_CFO_OBJECT_ID) 38/* struct osd_sense_info osi = {.key = 0};*/
40 ret = -ENOENT; 39 int ret;
41 else 40
42 ret = -EINVAL; 41 if (unlikely(!or)) {
43 } else if (osi.additional_code == osd_quota_error) 42 EXOFS_DBGMSG("%s: osd_start_request failed.\n", __func__);
44 ret = -ENOSPC; 43 return -ENOMEM;
45 else 44 }
46 ret = -EIO; 45 ret = osd_req_read_kern(or, obj, offset, p, length);
46 if (unlikely(ret)) {
47 EXOFS_DBGMSG("%s: osd_req_read_kern failed.\n", __func__);
48 goto out;
47 } 49 }
48 50
49 /* FIXME: should be include in osd_sense_info */ 51 ret = osd_finalize_request(or, 0, cred, NULL);
50 if (in_resid) 52 if (unlikely(ret)) {
51 *in_resid = or->in.req ? or->in.req->resid_len : 0; 53 EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", ret);
54 goto out;
55 }
52 56
53 if (out_resid) 57 ret = osd_execute_request(or);
54 *out_resid = or->out.req ? or->out.req->resid_len : 0; 58 if (unlikely(ret))
59 EXOFS_DBGMSG("osd_execute_request() => %d\n", ret);
60 /* osd_req_decode_sense(or, ret); */
55 61
62out:
63 osd_end_request(or);
56 return ret; 64 return ret;
57} 65}
58 66
59void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj) 67int exofs_get_io_state(struct exofs_sb_info *sbi, struct exofs_io_state** pios)
60{ 68{
61 osd_sec_init_nosec_doall_caps(cred_a, obj, false, true); 69 struct exofs_io_state *ios;
70
71 /*TODO: Maybe use kmem_cach per sbi of size
72 * exofs_io_state_size(sbi->s_numdevs)
73 */
74 ios = kzalloc(exofs_io_state_size(1), GFP_KERNEL);
75 if (unlikely(!ios)) {
76 *pios = NULL;
77 return -ENOMEM;
78 }
79
80 ios->sbi = sbi;
81 ios->obj.partition = sbi->s_pid;
82 *pios = ios;
83 return 0;
62} 84}
63 85
64/* 86void exofs_put_io_state(struct exofs_io_state *ios)
65 * Perform a synchronous OSD operation.
66 */
67int exofs_sync_op(struct osd_request *or, int timeout, uint8_t *credential)
68{ 87{
69 int ret; 88 if (ios) {
89 unsigned i;
70 90
71 or->timeout = timeout; 91 for (i = 0; i < ios->numdevs; i++) {
72 ret = osd_finalize_request(or, 0, credential, NULL); 92 struct exofs_per_dev_state *per_dev = &ios->per_dev[i];
73 if (ret) { 93
74 EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", ret); 94 if (per_dev->or)
75 return ret; 95 osd_end_request(per_dev->or);
96 if (per_dev->bio)
97 bio_put(per_dev->bio);
98 }
99
100 kfree(ios);
76 } 101 }
102}
77 103
78 ret = osd_execute_request(or); 104static void _sync_done(struct exofs_io_state *ios, void *p)
105{
106 struct completion *waiting = p;
79 107
80 if (ret) 108 complete(waiting);
81 EXOFS_DBGMSG("osd_execute_request() => %d\n", ret); 109}
82 /* osd_req_decode_sense(or, ret); */ 110
111static void _last_io(struct kref *kref)
112{
113 struct exofs_io_state *ios = container_of(
114 kref, struct exofs_io_state, kref);
115
116 ios->done(ios, ios->private);
117}
118
119static void _done_io(struct osd_request *or, void *p)
120{
121 struct exofs_io_state *ios = p;
122
123 kref_put(&ios->kref, _last_io);
124}
125
126static int exofs_io_execute(struct exofs_io_state *ios)
127{
128 DECLARE_COMPLETION_ONSTACK(wait);
129 bool sync = (ios->done == NULL);
130 int i, ret;
131
132 if (sync) {
133 ios->done = _sync_done;
134 ios->private = &wait;
135 }
136
137 for (i = 0; i < ios->numdevs; i++) {
138 struct osd_request *or = ios->per_dev[i].or;
139 if (unlikely(!or))
140 continue;
141
142 ret = osd_finalize_request(or, 0, ios->cred, NULL);
143 if (unlikely(ret)) {
144 EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n",
145 ret);
146 return ret;
147 }
148 }
149
150 kref_init(&ios->kref);
151
152 for (i = 0; i < ios->numdevs; i++) {
153 struct osd_request *or = ios->per_dev[i].or;
154 if (unlikely(!or))
155 continue;
156
157 kref_get(&ios->kref);
158 osd_execute_request_async(or, _done_io, ios);
159 }
160
161 kref_put(&ios->kref, _last_io);
162 ret = 0;
163
164 if (sync) {
165 wait_for_completion(&wait);
166 ret = exofs_check_io(ios, NULL);
167 }
83 return ret; 168 return ret;
84} 169}
85 170
86/* 171int exofs_check_io(struct exofs_io_state *ios, u64 *resid)
87 * Perform an asynchronous OSD operation.
88 */
89int exofs_async_op(struct osd_request *or, osd_req_done_fn *async_done,
90 void *caller_context, u8 *cred)
91{ 172{
92 int ret; 173 enum osd_err_priority acumulated_osd_err = 0;
174 int acumulated_lin_err = 0;
175 int i;
93 176
94 ret = osd_finalize_request(or, 0, cred, NULL); 177 for (i = 0; i < ios->numdevs; i++) {
95 if (ret) { 178 struct osd_sense_info osi;
96 EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", ret); 179 int ret = osd_req_decode_sense(ios->per_dev[i].or, &osi);
97 return ret; 180
181 if (likely(!ret))
182 continue;
183
184 if (unlikely(ret == -EFAULT)) {
185 EXOFS_DBGMSG("%s: EFAULT Need page clear\n", __func__);
186 /*FIXME: All the pages in this device range should:
187 * clear_highpage(page);
188 */
189 }
190
191 if (osi.osd_err_pri >= acumulated_osd_err) {
192 acumulated_osd_err = osi.osd_err_pri;
193 acumulated_lin_err = ret;
194 }
195 }
196
197 /* TODO: raid specific residual calculations */
198 if (resid) {
199 if (likely(!acumulated_lin_err))
200 *resid = 0;
201 else
202 *resid = ios->length;
203 }
204
205 return acumulated_lin_err;
206}
207
208int exofs_sbi_create(struct exofs_io_state *ios)
209{
210 int i, ret;
211
212 for (i = 0; i < 1; i++) {
213 struct osd_request *or;
214
215 or = osd_start_request(ios->sbi->s_dev, GFP_KERNEL);
216 if (unlikely(!or)) {
217 EXOFS_ERR("%s: osd_start_request failed\n", __func__);
218 ret = -ENOMEM;
219 goto out;
220 }
221 ios->per_dev[i].or = or;
222 ios->numdevs++;
223
224 osd_req_create_object(or, &ios->obj);
225 }
226 ret = exofs_io_execute(ios);
227
228out:
229 return ret;
230}
231
232int exofs_sbi_remove(struct exofs_io_state *ios)
233{
234 int i, ret;
235
236 for (i = 0; i < 1; i++) {
237 struct osd_request *or;
238
239 or = osd_start_request(ios->sbi->s_dev, GFP_KERNEL);
240 if (unlikely(!or)) {
241 EXOFS_ERR("%s: osd_start_request failed\n", __func__);
242 ret = -ENOMEM;
243 goto out;
244 }
245 ios->per_dev[i].or = or;
246 ios->numdevs++;
247
248 osd_req_remove_object(or, &ios->obj);
249 }
250 ret = exofs_io_execute(ios);
251
252out:
253 return ret;
254}
255
256int exofs_sbi_write(struct exofs_io_state *ios)
257{
258 int i, ret;
259
260 for (i = 0; i < 1; i++) {
261 struct osd_request *or;
262
263 or = osd_start_request(ios->sbi->s_dev, GFP_KERNEL);
264 if (unlikely(!or)) {
265 EXOFS_ERR("%s: osd_start_request failed\n", __func__);
266 ret = -ENOMEM;
267 goto out;
268 }
269 ios->per_dev[i].or = or;
270 ios->numdevs++;
271
272 if (ios->bio) {
273 struct bio *bio;
274
275 bio = ios->bio;
276
277 osd_req_write(or, &ios->obj, ios->offset, bio,
278 ios->length);
279/* EXOFS_DBGMSG("write sync=%d\n", sync);*/
280 } else if (ios->kern_buff) {
281 osd_req_write_kern(or, &ios->obj, ios->offset,
282 ios->kern_buff, ios->length);
283/* EXOFS_DBGMSG("write_kern sync=%d\n", sync);*/
284 } else {
285 osd_req_set_attributes(or, &ios->obj);
286/* EXOFS_DBGMSG("set_attributes sync=%d\n", sync);*/
287 }
288
289 if (ios->out_attr)
290 osd_req_add_set_attr_list(or, ios->out_attr,
291 ios->out_attr_len);
292
293 if (ios->in_attr)
294 osd_req_add_get_attr_list(or, ios->in_attr,
295 ios->in_attr_len);
98 } 296 }
297 ret = exofs_io_execute(ios);
298
299out:
300 return ret;
301}
302
303int exofs_sbi_read(struct exofs_io_state *ios)
304{
305 int i, ret;
306
307 for (i = 0; i < 1; i++) {
308 struct osd_request *or;
309
310 or = osd_start_request(ios->sbi->s_dev, GFP_KERNEL);
311 if (unlikely(!or)) {
312 EXOFS_ERR("%s: osd_start_request failed\n", __func__);
313 ret = -ENOMEM;
314 goto out;
315 }
316 ios->per_dev[i].or = or;
317 ios->numdevs++;
318
319 if (ios->bio) {
320 osd_req_read(or, &ios->obj, ios->offset, ios->bio,
321 ios->length);
322/* EXOFS_DBGMSG("read sync=%d\n", sync);*/
323 } else if (ios->kern_buff) {
324 osd_req_read_kern(or, &ios->obj, ios->offset,
325 ios->kern_buff, ios->length);
326/* EXOFS_DBGMSG("read_kern sync=%d\n", sync);*/
327 } else {
328 osd_req_get_attributes(or, &ios->obj);
329/* EXOFS_DBGMSG("get_attributes sync=%d\n", sync);*/
330 }
331
332 if (ios->out_attr)
333 osd_req_add_set_attr_list(or, ios->out_attr,
334 ios->out_attr_len);
99 335
100 ret = osd_execute_request_async(or, async_done, caller_context); 336 if (ios->in_attr)
337 osd_req_add_get_attr_list(or, ios->in_attr,
338 ios->in_attr_len);
339 }
340 ret = exofs_io_execute(ios);
101 341
102 if (ret) 342out:
103 EXOFS_DBGMSG("osd_execute_request_async() => %d\n", ret);
104 return ret; 343 return ret;
105} 344}
106 345
107int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr) 346int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr)
108{ 347{
109 struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */ 348 struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */
110 void *iter = NULL; 349 void *iter = NULL;
@@ -112,7 +351,8 @@ int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr)
112 351
113 do { 352 do {
114 nelem = 1; 353 nelem = 1;
115 osd_req_decode_get_attr_list(or, &cur_attr, &nelem, &iter); 354 osd_req_decode_get_attr_list(ios->per_dev[0].or,
355 &cur_attr, &nelem, &iter);
116 if ((cur_attr.attr_page == attr->attr_page) && 356 if ((cur_attr.attr_page == attr->attr_page) &&
117 (cur_attr.attr_id == attr->attr_id)) { 357 (cur_attr.attr_id == attr->attr_id)) {
118 attr->len = cur_attr.len; 358 attr->len = cur_attr.len;
@@ -123,3 +363,43 @@ int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr)
123 363
124 return -EIO; 364 return -EIO;
125} 365}
366
367int exofs_oi_truncate(struct exofs_i_info *oi, u64 size)
368{
369 struct exofs_sb_info *sbi = oi->vfs_inode.i_sb->s_fs_info;
370 struct exofs_io_state *ios;
371 struct osd_attr attr;
372 __be64 newsize;
373 int i, ret;
374
375 if (exofs_get_io_state(sbi, &ios))
376 return -ENOMEM;
377
378 ios->obj.id = exofs_oi_objno(oi);
379 ios->cred = oi->i_cred;
380
381 newsize = cpu_to_be64(size);
382 attr = g_attr_logical_length;
383 attr.val_ptr = &newsize;
384
385 for (i = 0; i < 1; i++) {
386 struct osd_request *or;
387
388 or = osd_start_request(sbi->s_dev, GFP_KERNEL);
389 if (unlikely(!or)) {
390 EXOFS_ERR("%s: osd_start_request failed\n", __func__);
391 ret = -ENOMEM;
392 goto out;
393 }
394 ios->per_dev[i].or = or;
395 ios->numdevs++;
396
397 osd_req_set_attributes(or, &ios->obj);
398 osd_req_add_set_attr_list(or, &attr, 1);
399 }
400 ret = exofs_io_execute(ios);
401
402out:
403 exofs_put_io_state(ios);
404 return ret;
405}