aboutsummaryrefslogtreecommitdiffstats
path: root/fs/exofs/ios.c
diff options
context:
space:
mode:
authorBoaz Harrosh <bharrosh@panasas.com>2009-11-16 09:03:05 -0500
committerBoaz Harrosh <bharrosh@panasas.com>2009-12-10 02:59:23 -0500
commit04dc1e88ad9c9f9639019e9646a89ce0ebf706bb (patch)
tree403206d1e85e9e487d847694cbe0ecf111b3f02b /fs/exofs/ios.c
parent06886a5a3dc5a5abe0a4d257c26317bde7047be8 (diff)
exofs: Multi-device mirror support
This patch changes on-disk format, it is accompanied with a parallel patch to mkfs.exofs that enables multi-device capabilities. After this patch, old exofs will refuse to mount a new formatted FS and new exofs will refuse an old format. This is done by moving the magic field offset inside the FSCB. A new FSCB *version* field was added. In the future, exofs will refuse to mount unmatched FSCB version. To up-grade or down-grade an exofs one must use mkfs.exofs --upgrade option before mounting. Introduced, a new object that contains a *device-table*. This object contains the default *data-map* and a linear array of devices information, which identifies the devices used in the filesystem. This object is only written to offline by mkfs.exofs. This is why it is kept separate from the FSCB, since the later is written to while mounted. Same partition number, same object number is used on all devices only the device varies. * define the new format, then load the device table on mount time make sure every thing is supported. * Change I/O engine to now support Mirror IO, .i.e write same data to multiple devices, read from a random device to spread the read-load from multiple clients (TODO: stripe read) Implementation notes: A few points introduced in previous patch should be mentioned here: * Special care was made so absolutlly all operation that have any chance of failing are done before any osd-request is executed. This is to minimize the need for a data consistency recovery, to only real IO errors. * Each IO state has a kref. It starts at 1, any osd-request executed will increment the kref, finally when all are executed the first ref is dropped. At IO-done, each request completion decrements the kref, the last one to return executes the internal _last_io() routine. _last_io() will call the registered io_state_done. On sync mode a caller does not supply a done method, indicating a synchronous request, the caller is put to sleep and a special io_state_done is registered that will awaken the caller. Though also in sync mode all operations are executed in parallel. Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Diffstat (limited to 'fs/exofs/ios.c')
-rw-r--r--fs/exofs/ios.c38
1 files changed, 27 insertions, 11 deletions
diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c
index bb2f9d341fdf..5bad01fa1f9f 100644
--- a/fs/exofs/ios.c
+++ b/fs/exofs/ios.c
@@ -71,7 +71,7 @@ int exofs_get_io_state(struct exofs_sb_info *sbi, struct exofs_io_state** pios)
71 /*TODO: Maybe use kmem_cach per sbi of size 71 /*TODO: Maybe use kmem_cach per sbi of size
72 * exofs_io_state_size(sbi->s_numdevs) 72 * exofs_io_state_size(sbi->s_numdevs)
73 */ 73 */
74 ios = kzalloc(exofs_io_state_size(1), GFP_KERNEL); 74 ios = kzalloc(exofs_io_state_size(sbi->s_numdevs), GFP_KERNEL);
75 if (unlikely(!ios)) { 75 if (unlikely(!ios)) {
76 *pios = NULL; 76 *pios = NULL;
77 return -ENOMEM; 77 return -ENOMEM;
@@ -209,10 +209,10 @@ int exofs_sbi_create(struct exofs_io_state *ios)
209{ 209{
210 int i, ret; 210 int i, ret;
211 211
212 for (i = 0; i < 1; i++) { 212 for (i = 0; i < ios->sbi->s_numdevs; i++) {
213 struct osd_request *or; 213 struct osd_request *or;
214 214
215 or = osd_start_request(ios->sbi->s_dev, GFP_KERNEL); 215 or = osd_start_request(ios->sbi->s_ods[i], GFP_KERNEL);
216 if (unlikely(!or)) { 216 if (unlikely(!or)) {
217 EXOFS_ERR("%s: osd_start_request failed\n", __func__); 217 EXOFS_ERR("%s: osd_start_request failed\n", __func__);
218 ret = -ENOMEM; 218 ret = -ENOMEM;
@@ -233,10 +233,10 @@ int exofs_sbi_remove(struct exofs_io_state *ios)
233{ 233{
234 int i, ret; 234 int i, ret;
235 235
236 for (i = 0; i < 1; i++) { 236 for (i = 0; i < ios->sbi->s_numdevs; i++) {
237 struct osd_request *or; 237 struct osd_request *or;
238 238
239 or = osd_start_request(ios->sbi->s_dev, GFP_KERNEL); 239 or = osd_start_request(ios->sbi->s_ods[i], GFP_KERNEL);
240 if (unlikely(!or)) { 240 if (unlikely(!or)) {
241 EXOFS_ERR("%s: osd_start_request failed\n", __func__); 241 EXOFS_ERR("%s: osd_start_request failed\n", __func__);
242 ret = -ENOMEM; 242 ret = -ENOMEM;
@@ -257,10 +257,10 @@ int exofs_sbi_write(struct exofs_io_state *ios)
257{ 257{
258 int i, ret; 258 int i, ret;
259 259
260 for (i = 0; i < 1; i++) { 260 for (i = 0; i < ios->sbi->s_numdevs; i++) {
261 struct osd_request *or; 261 struct osd_request *or;
262 262
263 or = osd_start_request(ios->sbi->s_dev, GFP_KERNEL); 263 or = osd_start_request(ios->sbi->s_ods[i], GFP_KERNEL);
264 if (unlikely(!or)) { 264 if (unlikely(!or)) {
265 EXOFS_ERR("%s: osd_start_request failed\n", __func__); 265 EXOFS_ERR("%s: osd_start_request failed\n", __func__);
266 ret = -ENOMEM; 266 ret = -ENOMEM;
@@ -272,7 +272,21 @@ int exofs_sbi_write(struct exofs_io_state *ios)
272 if (ios->bio) { 272 if (ios->bio) {
273 struct bio *bio; 273 struct bio *bio;
274 274
275 bio = ios->bio; 275 if (i != 0) {
276 bio = bio_kmalloc(GFP_KERNEL,
277 ios->bio->bi_max_vecs);
278 if (unlikely(!bio)) {
279 ret = -ENOMEM;
280 goto out;
281 }
282
283 __bio_clone(bio, ios->bio);
284 bio->bi_bdev = NULL;
285 bio->bi_next = NULL;
286 ios->per_dev[i].bio = bio;
287 } else {
288 bio = ios->bio;
289 }
276 290
277 osd_req_write(or, &ios->obj, ios->offset, bio, 291 osd_req_write(or, &ios->obj, ios->offset, bio,
278 ios->length); 292 ios->length);
@@ -306,8 +320,10 @@ int exofs_sbi_read(struct exofs_io_state *ios)
306 320
307 for (i = 0; i < 1; i++) { 321 for (i = 0; i < 1; i++) {
308 struct osd_request *or; 322 struct osd_request *or;
323 unsigned first_dev = (unsigned)ios->obj.id;
309 324
310 or = osd_start_request(ios->sbi->s_dev, GFP_KERNEL); 325 first_dev %= ios->sbi->s_numdevs;
326 or = osd_start_request(ios->sbi->s_ods[first_dev], GFP_KERNEL);
311 if (unlikely(!or)) { 327 if (unlikely(!or)) {
312 EXOFS_ERR("%s: osd_start_request failed\n", __func__); 328 EXOFS_ERR("%s: osd_start_request failed\n", __func__);
313 ret = -ENOMEM; 329 ret = -ENOMEM;
@@ -382,10 +398,10 @@ int exofs_oi_truncate(struct exofs_i_info *oi, u64 size)
382 attr = g_attr_logical_length; 398 attr = g_attr_logical_length;
383 attr.val_ptr = &newsize; 399 attr.val_ptr = &newsize;
384 400
385 for (i = 0; i < 1; i++) { 401 for (i = 0; i < sbi->s_numdevs; i++) {
386 struct osd_request *or; 402 struct osd_request *or;
387 403
388 or = osd_start_request(sbi->s_dev, GFP_KERNEL); 404 or = osd_start_request(sbi->s_ods[i], GFP_KERNEL);
389 if (unlikely(!or)) { 405 if (unlikely(!or)) {
390 EXOFS_ERR("%s: osd_start_request failed\n", __func__); 406 EXOFS_ERR("%s: osd_start_request failed\n", __func__);
391 ret = -ENOMEM; 407 ret = -ENOMEM;