aboutsummaryrefslogtreecommitdiffstats
path: root/fs/exofs/inode.c
diff options
context:
space:
mode:
authorBoaz Harrosh <bharrosh@panasas.com>2009-11-08 07:54:08 -0500
committerBoaz Harrosh <bharrosh@panasas.com>2009-12-10 02:59:22 -0500
commit06886a5a3dc5a5abe0a4d257c26317bde7047be8 (patch)
tree858ac56e120c0473d764fc64a2660e6d79729c8c /fs/exofs/inode.c
parent8ce9bdd1fbe962933736d7977e972972cd5d754c (diff)
exofs: Move all operations to an io_engine
In anticipation for multi-device operations, we separate osd operations into an abstract I/O API. Currently only one device is used but later when adding more devices, we will drive all devices in parallel according to a "data_map" that describes how data is arranged on multiple devices. The file system level operates, like before, as if there is one object (inode-number) and an i_size. The io engine will split this to the same object-number but on multiple device. At first we introduce Mirror (raid 1) layout. But at the final outcome we intend to fully implement the pNFS-Objects data-map, including raid 0,4,5,6 over mirrored devices, over multiple device-groups. And more. See: http://tools.ietf.org/html/draft-ietf-nfsv4-pnfs-obj-12 * Define an io_state based API for accessing osd storage devices in an abstract way. Usage: First a caller allocates an io state with: exofs_get_io_state(struct exofs_sb_info *sbi, struct exofs_io_state** ios); Then calles one of: exofs_sbi_create(struct exofs_io_state *ios); exofs_sbi_remove(struct exofs_io_state *ios); exofs_sbi_write(struct exofs_io_state *ios); exofs_sbi_read(struct exofs_io_state *ios); exofs_oi_truncate(struct exofs_i_info *oi, u64 new_len); And when done exofs_put_io_state(struct exofs_io_state *ios); * Convert all source files to use this new API * Convert from bio_alloc to bio_kmalloc * In io engine we make use of the now fixed osd_req_decode_sense There are no functional changes or on disk additions after this patch. Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Diffstat (limited to 'fs/exofs/inode.c')
-rw-r--r--fs/exofs/inode.c383
1 files changed, 184 insertions, 199 deletions
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 7bc71a7d30a8..7578950fd135 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -37,17 +37,18 @@
37 37
38#include "exofs.h" 38#include "exofs.h"
39 39
40#ifdef CONFIG_EXOFS_DEBUG
41# define EXOFS_DEBUG_OBJ_ISIZE 1
42#endif
43
44#define EXOFS_DBGMSG2(M...) do {} while (0) 40#define EXOFS_DBGMSG2(M...) do {} while (0)
45 41
42enum { BIO_MAX_PAGES_KMALLOC =
43 (PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec),
44};
45
46struct page_collect { 46struct page_collect {
47 struct exofs_sb_info *sbi; 47 struct exofs_sb_info *sbi;
48 struct request_queue *req_q; 48 struct request_queue *req_q;
49 struct inode *inode; 49 struct inode *inode;
50 unsigned expected_pages; 50 unsigned expected_pages;
51 struct exofs_io_state *ios;
51 52
52 struct bio *bio; 53 struct bio *bio;
53 unsigned nr_pages; 54 unsigned nr_pages;
@@ -56,7 +57,7 @@ struct page_collect {
56}; 57};
57 58
58static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, 59static void _pcol_init(struct page_collect *pcol, unsigned expected_pages,
59 struct inode *inode) 60 struct inode *inode)
60{ 61{
61 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; 62 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
62 63
@@ -65,13 +66,11 @@ static void _pcol_init(struct page_collect *pcol, unsigned expected_pages,
65 pcol->inode = inode; 66 pcol->inode = inode;
66 pcol->expected_pages = expected_pages; 67 pcol->expected_pages = expected_pages;
67 68
69 pcol->ios = NULL;
68 pcol->bio = NULL; 70 pcol->bio = NULL;
69 pcol->nr_pages = 0; 71 pcol->nr_pages = 0;
70 pcol->length = 0; 72 pcol->length = 0;
71 pcol->pg_first = -1; 73 pcol->pg_first = -1;
72
73 EXOFS_DBGMSG("_pcol_init ino=0x%lx expected_pages=%u\n", inode->i_ino,
74 expected_pages);
75} 74}
76 75
77static void _pcol_reset(struct page_collect *pcol) 76static void _pcol_reset(struct page_collect *pcol)
@@ -82,35 +81,49 @@ static void _pcol_reset(struct page_collect *pcol)
82 pcol->nr_pages = 0; 81 pcol->nr_pages = 0;
83 pcol->length = 0; 82 pcol->length = 0;
84 pcol->pg_first = -1; 83 pcol->pg_first = -1;
85 EXOFS_DBGMSG("_pcol_reset ino=0x%lx expected_pages=%u\n", 84 pcol->ios = NULL;
86 pcol->inode->i_ino, pcol->expected_pages);
87 85
88 /* this is probably the end of the loop but in writes 86 /* this is probably the end of the loop but in writes
89 * it might not end here. don't be left with nothing 87 * it might not end here. don't be left with nothing
90 */ 88 */
91 if (!pcol->expected_pages) 89 if (!pcol->expected_pages)
92 pcol->expected_pages = 128; 90 pcol->expected_pages = BIO_MAX_PAGES_KMALLOC;
93} 91}
94 92
95static int pcol_try_alloc(struct page_collect *pcol) 93static int pcol_try_alloc(struct page_collect *pcol)
96{ 94{
97 int pages = min_t(unsigned, pcol->expected_pages, BIO_MAX_PAGES); 95 int pages = min_t(unsigned, pcol->expected_pages,
96 BIO_MAX_PAGES_KMALLOC);
97
98 if (!pcol->ios) { /* First time allocate io_state */
99 int ret = exofs_get_io_state(pcol->sbi, &pcol->ios);
100
101 if (ret)
102 return ret;
103 }
98 104
99 for (; pages; pages >>= 1) { 105 for (; pages; pages >>= 1) {
100 pcol->bio = bio_alloc(GFP_KERNEL, pages); 106 pcol->bio = bio_kmalloc(GFP_KERNEL, pages);
101 if (likely(pcol->bio)) 107 if (likely(pcol->bio))
102 return 0; 108 return 0;
103 } 109 }
104 110
105 EXOFS_ERR("Failed to kcalloc expected_pages=%u\n", 111 EXOFS_ERR("Failed to bio_kmalloc expected_pages=%u\n",
106 pcol->expected_pages); 112 pcol->expected_pages);
107 return -ENOMEM; 113 return -ENOMEM;
108} 114}
109 115
110static void pcol_free(struct page_collect *pcol) 116static void pcol_free(struct page_collect *pcol)
111{ 117{
112 bio_put(pcol->bio); 118 if (pcol->bio) {
113 pcol->bio = NULL; 119 bio_put(pcol->bio);
120 pcol->bio = NULL;
121 }
122
123 if (pcol->ios) {
124 exofs_put_io_state(pcol->ios);
125 pcol->ios = NULL;
126 }
114} 127}
115 128
116static int pcol_add_page(struct page_collect *pcol, struct page *page, 129static int pcol_add_page(struct page_collect *pcol, struct page *page,
@@ -163,22 +176,17 @@ static void update_write_page(struct page *page, int ret)
163/* Called at the end of reads, to optionally unlock pages and update their 176/* Called at the end of reads, to optionally unlock pages and update their
164 * status. 177 * status.
165 */ 178 */
166static int __readpages_done(struct osd_request *or, struct page_collect *pcol, 179static int __readpages_done(struct page_collect *pcol, bool do_unlock)
167 bool do_unlock)
168{ 180{
169 struct bio_vec *bvec; 181 struct bio_vec *bvec;
170 int i; 182 int i;
171 u64 resid; 183 u64 resid;
172 u64 good_bytes; 184 u64 good_bytes;
173 u64 length = 0; 185 u64 length = 0;
174 int ret = exofs_check_ok_resid(or, &resid, NULL); 186 int ret = exofs_check_io(pcol->ios, &resid);
175
176 osd_end_request(or);
177 187
178 if (likely(!ret)) 188 if (likely(!ret))
179 good_bytes = pcol->length; 189 good_bytes = pcol->length;
180 else if (!resid)
181 good_bytes = 0;
182 else 190 else
183 good_bytes = pcol->length - resid; 191 good_bytes = pcol->length - resid;
184 192
@@ -216,13 +224,13 @@ static int __readpages_done(struct osd_request *or, struct page_collect *pcol,
216} 224}
217 225
218/* callback of async reads */ 226/* callback of async reads */
219static void readpages_done(struct osd_request *or, void *p) 227static void readpages_done(struct exofs_io_state *ios, void *p)
220{ 228{
221 struct page_collect *pcol = p; 229 struct page_collect *pcol = p;
222 230
223 __readpages_done(or, pcol, true); 231 __readpages_done(pcol, true);
224 atomic_dec(&pcol->sbi->s_curr_pending); 232 atomic_dec(&pcol->sbi->s_curr_pending);
225 kfree(p); 233 kfree(pcol);
226} 234}
227 235
228static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw) 236static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw)
@@ -240,17 +248,13 @@ static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw)
240 248
241 unlock_page(page); 249 unlock_page(page);
242 } 250 }
243 pcol_free(pcol);
244} 251}
245 252
246static int read_exec(struct page_collect *pcol, bool is_sync) 253static int read_exec(struct page_collect *pcol, bool is_sync)
247{ 254{
248 struct exofs_i_info *oi = exofs_i(pcol->inode); 255 struct exofs_i_info *oi = exofs_i(pcol->inode);
249 struct osd_obj_id obj = {pcol->sbi->s_pid, 256 struct exofs_io_state *ios = pcol->ios;
250 pcol->inode->i_ino + EXOFS_OBJ_OFF};
251 struct osd_request *or = NULL;
252 struct page_collect *pcol_copy = NULL; 257 struct page_collect *pcol_copy = NULL;
253 loff_t i_start = pcol->pg_first << PAGE_CACHE_SHIFT;
254 int ret; 258 int ret;
255 259
256 if (!pcol->bio) 260 if (!pcol->bio)
@@ -259,17 +263,13 @@ static int read_exec(struct page_collect *pcol, bool is_sync)
259 /* see comment in _readpage() about sync reads */ 263 /* see comment in _readpage() about sync reads */
260 WARN_ON(is_sync && (pcol->nr_pages != 1)); 264 WARN_ON(is_sync && (pcol->nr_pages != 1));
261 265
262 or = osd_start_request(pcol->sbi->s_dev, GFP_KERNEL); 266 ios->bio = pcol->bio;
263 if (unlikely(!or)) { 267 ios->length = pcol->length;
264 ret = -ENOMEM; 268 ios->offset = pcol->pg_first << PAGE_CACHE_SHIFT;
265 goto err;
266 }
267
268 osd_req_read(or, &obj, i_start, pcol->bio, pcol->length);
269 269
270 if (is_sync) { 270 if (is_sync) {
271 exofs_sync_op(or, pcol->sbi->s_timeout, oi->i_cred); 271 exofs_oi_read(oi, pcol->ios);
272 return __readpages_done(or, pcol, false); 272 return __readpages_done(pcol, false);
273 } 273 }
274 274
275 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); 275 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL);
@@ -279,14 +279,16 @@ static int read_exec(struct page_collect *pcol, bool is_sync)
279 } 279 }
280 280
281 *pcol_copy = *pcol; 281 *pcol_copy = *pcol;
282 ret = exofs_async_op(or, readpages_done, pcol_copy, oi->i_cred); 282 ios->done = readpages_done;
283 ios->private = pcol_copy;
284 ret = exofs_oi_read(oi, ios);
283 if (unlikely(ret)) 285 if (unlikely(ret))
284 goto err; 286 goto err;
285 287
286 atomic_inc(&pcol->sbi->s_curr_pending); 288 atomic_inc(&pcol->sbi->s_curr_pending);
287 289
288 EXOFS_DBGMSG("read_exec obj=0x%llx start=0x%llx length=0x%lx\n", 290 EXOFS_DBGMSG("read_exec obj=0x%llx start=0x%llx length=0x%lx\n",
289 obj.id, _LLU(i_start), pcol->length); 291 ios->obj.id, _LLU(ios->offset), pcol->length);
290 292
291 /* pages ownership was passed to pcol_copy */ 293 /* pages ownership was passed to pcol_copy */
292 _pcol_reset(pcol); 294 _pcol_reset(pcol);
@@ -295,12 +297,10 @@ static int read_exec(struct page_collect *pcol, bool is_sync)
295err: 297err:
296 if (!is_sync) 298 if (!is_sync)
297 _unlock_pcol_pages(pcol, ret, READ); 299 _unlock_pcol_pages(pcol, ret, READ);
298 else /* Pages unlocked by caller in sync mode only free bio */ 300
299 pcol_free(pcol); 301 pcol_free(pcol);
300 302
301 kfree(pcol_copy); 303 kfree(pcol_copy);
302 if (or)
303 osd_end_request(or);
304 return ret; 304 return ret;
305} 305}
306 306
@@ -421,9 +421,8 @@ static int _readpage(struct page *page, bool is_sync)
421 421
422 _pcol_init(&pcol, 1, page->mapping->host); 422 _pcol_init(&pcol, 1, page->mapping->host);
423 423
424 /* readpage_strip might call read_exec(,async) inside at several places 424 /* readpage_strip might call read_exec(,is_sync==false) at several
425 * but this is safe for is_async=0 since read_exec will not do anything 425 * places but not if we have a single page.
426 * when we have a single page.
427 */ 426 */
428 ret = readpage_strip(&pcol, page); 427 ret = readpage_strip(&pcol, page);
429 if (ret) { 428 if (ret) {
@@ -442,8 +441,8 @@ static int exofs_readpage(struct file *file, struct page *page)
442 return _readpage(page, false); 441 return _readpage(page, false);
443} 442}
444 443
445/* Callback for osd_write. All writes are asynchronouse */ 444/* Callback for osd_write. All writes are asynchronous */
446static void writepages_done(struct osd_request *or, void *p) 445static void writepages_done(struct exofs_io_state *ios, void *p)
447{ 446{
448 struct page_collect *pcol = p; 447 struct page_collect *pcol = p;
449 struct bio_vec *bvec; 448 struct bio_vec *bvec;
@@ -451,16 +450,12 @@ static void writepages_done(struct osd_request *or, void *p)
451 u64 resid; 450 u64 resid;
452 u64 good_bytes; 451 u64 good_bytes;
453 u64 length = 0; 452 u64 length = 0;
453 int ret = exofs_check_io(ios, &resid);
454 454
455 int ret = exofs_check_ok_resid(or, NULL, &resid);
456
457 osd_end_request(or);
458 atomic_dec(&pcol->sbi->s_curr_pending); 455 atomic_dec(&pcol->sbi->s_curr_pending);
459 456
460 if (likely(!ret)) 457 if (likely(!ret))
461 good_bytes = pcol->length; 458 good_bytes = pcol->length;
462 else if (!resid)
463 good_bytes = 0;
464 else 459 else
465 good_bytes = pcol->length - resid; 460 good_bytes = pcol->length - resid;
466 461
@@ -498,23 +493,13 @@ static void writepages_done(struct osd_request *or, void *p)
498static int write_exec(struct page_collect *pcol) 493static int write_exec(struct page_collect *pcol)
499{ 494{
500 struct exofs_i_info *oi = exofs_i(pcol->inode); 495 struct exofs_i_info *oi = exofs_i(pcol->inode);
501 struct osd_obj_id obj = {pcol->sbi->s_pid, 496 struct exofs_io_state *ios = pcol->ios;
502 pcol->inode->i_ino + EXOFS_OBJ_OFF};
503 struct osd_request *or = NULL;
504 struct page_collect *pcol_copy = NULL; 497 struct page_collect *pcol_copy = NULL;
505 loff_t i_start = pcol->pg_first << PAGE_CACHE_SHIFT;
506 int ret; 498 int ret;
507 499
508 if (!pcol->bio) 500 if (!pcol->bio)
509 return 0; 501 return 0;
510 502
511 or = osd_start_request(pcol->sbi->s_dev, GFP_KERNEL);
512 if (unlikely(!or)) {
513 EXOFS_ERR("write_exec: Faild to osd_start_request()\n");
514 ret = -ENOMEM;
515 goto err;
516 }
517
518 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); 503 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL);
519 if (!pcol_copy) { 504 if (!pcol_copy) {
520 EXOFS_ERR("write_exec: Faild to kmalloc(pcol)\n"); 505 EXOFS_ERR("write_exec: Faild to kmalloc(pcol)\n");
@@ -525,16 +510,22 @@ static int write_exec(struct page_collect *pcol)
525 *pcol_copy = *pcol; 510 *pcol_copy = *pcol;
526 511
527 pcol_copy->bio->bi_rw |= (1 << BIO_RW); /* FIXME: bio_set_dir() */ 512 pcol_copy->bio->bi_rw |= (1 << BIO_RW); /* FIXME: bio_set_dir() */
528 osd_req_write(or, &obj, i_start, pcol_copy->bio, pcol_copy->length); 513
529 ret = exofs_async_op(or, writepages_done, pcol_copy, oi->i_cred); 514 ios->bio = pcol_copy->bio;
515 ios->offset = pcol_copy->pg_first << PAGE_CACHE_SHIFT;
516 ios->length = pcol_copy->length;
517 ios->done = writepages_done;
518 ios->private = pcol_copy;
519
520 ret = exofs_oi_write(oi, ios);
530 if (unlikely(ret)) { 521 if (unlikely(ret)) {
531 EXOFS_ERR("write_exec: exofs_async_op() Faild\n"); 522 EXOFS_ERR("write_exec: exofs_oi_write() Faild\n");
532 goto err; 523 goto err;
533 } 524 }
534 525
535 atomic_inc(&pcol->sbi->s_curr_pending); 526 atomic_inc(&pcol->sbi->s_curr_pending);
536 EXOFS_DBGMSG("write_exec(0x%lx, 0x%llx) start=0x%llx length=0x%lx\n", 527 EXOFS_DBGMSG("write_exec(0x%lx, 0x%llx) start=0x%llx length=0x%lx\n",
537 pcol->inode->i_ino, pcol->pg_first, _LLU(i_start), 528 pcol->inode->i_ino, pcol->pg_first, _LLU(ios->offset),
538 pcol->length); 529 pcol->length);
539 /* pages ownership was passed to pcol_copy */ 530 /* pages ownership was passed to pcol_copy */
540 _pcol_reset(pcol); 531 _pcol_reset(pcol);
@@ -542,9 +533,9 @@ static int write_exec(struct page_collect *pcol)
542 533
543err: 534err:
544 _unlock_pcol_pages(pcol, ret, WRITE); 535 _unlock_pcol_pages(pcol, ret, WRITE);
536 pcol_free(pcol);
545 kfree(pcol_copy); 537 kfree(pcol_copy);
546 if (or) 538
547 osd_end_request(or);
548 return ret; 539 return ret;
549} 540}
550 541
@@ -588,6 +579,9 @@ static int writepage_strip(struct page *page,
588 if (PageError(page)) 579 if (PageError(page))
589 ClearPageError(page); 580 ClearPageError(page);
590 unlock_page(page); 581 unlock_page(page);
582 EXOFS_DBGMSG("writepage_strip(0x%lx, 0x%lx) "
583 "outside the limits\n",
584 inode->i_ino, page->index);
591 return 0; 585 return 0;
592 } 586 }
593 } 587 }
@@ -602,6 +596,9 @@ try_again:
602 ret = write_exec(pcol); 596 ret = write_exec(pcol);
603 if (unlikely(ret)) 597 if (unlikely(ret))
604 goto fail; 598 goto fail;
599
600 EXOFS_DBGMSG("writepage_strip(0x%lx, 0x%lx) Discontinuity\n",
601 inode->i_ino, page->index);
605 goto try_again; 602 goto try_again;
606 } 603 }
607 604
@@ -636,6 +633,8 @@ try_again:
636 return 0; 633 return 0;
637 634
638fail: 635fail:
636 EXOFS_DBGMSG("Error: writepage_strip(0x%lx, 0x%lx)=>%d\n",
637 inode->i_ino, page->index, ret);
639 set_bit(AS_EIO, &page->mapping->flags); 638 set_bit(AS_EIO, &page->mapping->flags);
640 unlock_page(page); 639 unlock_page(page);
641 return ret; 640 return ret;
@@ -654,14 +653,17 @@ static int exofs_writepages(struct address_space *mapping,
654 wbc->range_end >> PAGE_CACHE_SHIFT; 653 wbc->range_end >> PAGE_CACHE_SHIFT;
655 654
656 if (start || end) 655 if (start || end)
657 expected_pages = min(end - start + 1, 32L); 656 expected_pages = end - start + 1;
658 else 657 else
659 expected_pages = mapping->nrpages; 658 expected_pages = mapping->nrpages;
660 659
661 EXOFS_DBGMSG("inode(0x%lx) wbc->start=0x%llx wbc->end=0x%llx" 660 if (expected_pages < 32L)
662 " m->nrpages=%lu start=0x%lx end=0x%lx\n", 661 expected_pages = 32L;
662
663 EXOFS_DBGMSG("inode(0x%lx) wbc->start=0x%llx wbc->end=0x%llx "
664 "nrpages=%lu start=0x%lx end=0x%lx expected_pages=%ld\n",
663 mapping->host->i_ino, wbc->range_start, wbc->range_end, 665 mapping->host->i_ino, wbc->range_start, wbc->range_end,
664 mapping->nrpages, start, end); 666 mapping->nrpages, start, end, expected_pages);
665 667
666 _pcol_init(&pcol, expected_pages, mapping->host); 668 _pcol_init(&pcol, expected_pages, mapping->host);
667 669
@@ -773,19 +775,28 @@ static int exofs_get_block(struct inode *inode, sector_t iblock,
773const struct osd_attr g_attr_logical_length = ATTR_DEF( 775const struct osd_attr g_attr_logical_length = ATTR_DEF(
774 OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); 776 OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8);
775 777
778static int _do_truncate(struct inode *inode)
779{
780 struct exofs_i_info *oi = exofs_i(inode);
781 loff_t isize = i_size_read(inode);
782 int ret;
783
784 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
785
786 nobh_truncate_page(inode->i_mapping, isize, exofs_get_block);
787
788 ret = exofs_oi_truncate(oi, (u64)isize);
789 EXOFS_DBGMSG("(0x%lx) size=0x%llx\n", inode->i_ino, isize);
790 return ret;
791}
792
776/* 793/*
777 * Truncate a file to the specified size - all we have to do is set the size 794 * Truncate a file to the specified size - all we have to do is set the size
778 * attribute. We make sure the object exists first. 795 * attribute. We make sure the object exists first.
779 */ 796 */
780void exofs_truncate(struct inode *inode) 797void exofs_truncate(struct inode *inode)
781{ 798{
782 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
783 struct exofs_i_info *oi = exofs_i(inode); 799 struct exofs_i_info *oi = exofs_i(inode);
784 struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF};
785 struct osd_request *or;
786 struct osd_attr attr;
787 loff_t isize = i_size_read(inode);
788 __be64 newsize;
789 int ret; 800 int ret;
790 801
791 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) 802 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
@@ -795,22 +806,6 @@ void exofs_truncate(struct inode *inode)
795 return; 806 return;
796 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) 807 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
797 return; 808 return;
798 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
799
800 nobh_truncate_page(inode->i_mapping, isize, exofs_get_block);
801
802 or = osd_start_request(sbi->s_dev, GFP_KERNEL);
803 if (unlikely(!or)) {
804 EXOFS_ERR("ERROR: exofs_truncate: osd_start_request failed\n");
805 goto fail;
806 }
807
808 osd_req_set_attributes(or, &obj);
809
810 newsize = cpu_to_be64((u64)isize);
811 attr = g_attr_logical_length;
812 attr.val_ptr = &newsize;
813 osd_req_add_set_attr_list(or, &attr, 1);
814 809
815 /* if we are about to truncate an object, and it hasn't been 810 /* if we are about to truncate an object, and it hasn't been
816 * created yet, wait 811 * created yet, wait
@@ -818,8 +813,7 @@ void exofs_truncate(struct inode *inode)
818 if (unlikely(wait_obj_created(oi))) 813 if (unlikely(wait_obj_created(oi)))
819 goto fail; 814 goto fail;
820 815
821 ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred); 816 ret = _do_truncate(inode);
822 osd_end_request(or);
823 if (ret) 817 if (ret)
824 goto fail; 818 goto fail;
825 819
@@ -849,66 +843,57 @@ int exofs_setattr(struct dentry *dentry, struct iattr *iattr)
849 843
850/* 844/*
851 * Read an inode from the OSD, and return it as is. We also return the size 845 * Read an inode from the OSD, and return it as is. We also return the size
852 * attribute in the 'sanity' argument if we got compiled with debugging turned 846 * attribute in the 'obj_size' argument.
853 * on.
854 */ 847 */
855static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi, 848static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi,
856 struct exofs_fcb *inode, uint64_t *sanity) 849 struct exofs_fcb *inode, uint64_t *obj_size)
857{ 850{
858 struct exofs_sb_info *sbi = sb->s_fs_info; 851 struct exofs_sb_info *sbi = sb->s_fs_info;
859 struct osd_request *or; 852 struct osd_attr attrs[2];
860 struct osd_attr attr; 853 struct exofs_io_state *ios;
861 struct osd_obj_id obj = {sbi->s_pid,
862 oi->vfs_inode.i_ino + EXOFS_OBJ_OFF};
863 int ret; 854 int ret;
864 855
865 exofs_make_credential(oi->i_cred, &obj); 856 *obj_size = ~0;
866 857 ret = exofs_get_io_state(sbi, &ios);
867 or = osd_start_request(sbi->s_dev, GFP_KERNEL); 858 if (unlikely(ret)) {
868 if (unlikely(!or)) { 859 EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
869 EXOFS_ERR("exofs_get_inode: osd_start_request failed.\n"); 860 return ret;
870 return -ENOMEM;
871 } 861 }
872 osd_req_get_attributes(or, &obj);
873 862
874 /* we need the inode attribute */ 863 ios->obj.id = exofs_oi_objno(oi);
875 osd_req_add_get_attr_list(or, &g_attr_inode_data, 1); 864 exofs_make_credential(oi->i_cred, &ios->obj);
865 ios->cred = oi->i_cred;
876 866
877#ifdef EXOFS_DEBUG_OBJ_ISIZE 867 attrs[0] = g_attr_inode_data;
878 /* we get the size attributes to do a sanity check */ 868 attrs[1] = g_attr_logical_length;
879 osd_req_add_get_attr_list(or, &g_attr_logical_length, 1); 869 ios->in_attr = attrs;
880#endif 870 ios->in_attr_len = ARRAY_SIZE(attrs);
881 871
882 ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred); 872 ret = exofs_sbi_read(ios);
883 if (ret) 873 if (ret)
884 goto out; 874 goto out;
885 875
886 attr = g_attr_inode_data; 876 ret = extract_attr_from_ios(ios, &attrs[0]);
887 ret = extract_attr_from_req(or, &attr);
888 if (ret) { 877 if (ret) {
889 EXOFS_ERR("exofs_get_inode: extract_attr_from_req failed\n"); 878 EXOFS_ERR("%s: extract_attr of inode_data failed\n", __func__);
890 goto out; 879 goto out;
891 } 880 }
881 WARN_ON(attrs[0].len != EXOFS_INO_ATTR_SIZE);
882 memcpy(inode, attrs[0].val_ptr, EXOFS_INO_ATTR_SIZE);
892 883
893 WARN_ON(attr.len != EXOFS_INO_ATTR_SIZE); 884 ret = extract_attr_from_ios(ios, &attrs[1]);
894 memcpy(inode, attr.val_ptr, EXOFS_INO_ATTR_SIZE);
895
896#ifdef EXOFS_DEBUG_OBJ_ISIZE
897 attr = g_attr_logical_length;
898 ret = extract_attr_from_req(or, &attr);
899 if (ret) { 885 if (ret) {
900 EXOFS_ERR("ERROR: extract attr from or failed\n"); 886 EXOFS_ERR("%s: extract_attr of logical_length failed\n",
887 __func__);
901 goto out; 888 goto out;
902 } 889 }
903 *sanity = get_unaligned_be64(attr.val_ptr); 890 *obj_size = get_unaligned_be64(attrs[1].val_ptr);
904#endif
905 891
906out: 892out:
907 osd_end_request(or); 893 exofs_put_io_state(ios);
908 return ret; 894 return ret;
909} 895}
910 896
911
912static void __oi_init(struct exofs_i_info *oi) 897static void __oi_init(struct exofs_i_info *oi)
913{ 898{
914 init_waitqueue_head(&oi->i_wq); 899 init_waitqueue_head(&oi->i_wq);
@@ -922,7 +907,7 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
922 struct exofs_i_info *oi; 907 struct exofs_i_info *oi;
923 struct exofs_fcb fcb; 908 struct exofs_fcb fcb;
924 struct inode *inode; 909 struct inode *inode;
925 uint64_t uninitialized_var(sanity); 910 uint64_t obj_size;
926 int ret; 911 int ret;
927 912
928 inode = iget_locked(sb, ino); 913 inode = iget_locked(sb, ino);
@@ -934,7 +919,7 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
934 __oi_init(oi); 919 __oi_init(oi);
935 920
936 /* read the inode from the osd */ 921 /* read the inode from the osd */
937 ret = exofs_get_inode(sb, oi, &fcb, &sanity); 922 ret = exofs_get_inode(sb, oi, &fcb, &obj_size);
938 if (ret) 923 if (ret)
939 goto bad_inode; 924 goto bad_inode;
940 925
@@ -955,13 +940,12 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
955 inode->i_blkbits = EXOFS_BLKSHIFT; 940 inode->i_blkbits = EXOFS_BLKSHIFT;
956 inode->i_generation = le32_to_cpu(fcb.i_generation); 941 inode->i_generation = le32_to_cpu(fcb.i_generation);
957 942
958#ifdef EXOFS_DEBUG_OBJ_ISIZE 943 if ((inode->i_size != obj_size) &&
959 if ((inode->i_size != sanity) &&
960 (!exofs_inode_is_fast_symlink(inode))) { 944 (!exofs_inode_is_fast_symlink(inode))) {
961 EXOFS_ERR("WARNING: Size of inode=%llu != object=%llu\n", 945 EXOFS_ERR("WARNING: Size of inode=%llu != object=%llu\n",
962 inode->i_size, _LLU(sanity)); 946 inode->i_size, _LLU(obj_size));
947 /* FIXME: call exofs_inode_recovery() */
963 } 948 }
964#endif
965 949
966 oi->i_dir_start_lookup = 0; 950 oi->i_dir_start_lookup = 0;
967 951
@@ -1027,23 +1011,30 @@ int __exofs_wait_obj_created(struct exofs_i_info *oi)
1027 * set the obj_created flag so that other methods know that the object exists on 1011 * set the obj_created flag so that other methods know that the object exists on
1028 * the OSD. 1012 * the OSD.
1029 */ 1013 */
1030static void create_done(struct osd_request *or, void *p) 1014static void create_done(struct exofs_io_state *ios, void *p)
1031{ 1015{
1032 struct inode *inode = p; 1016 struct inode *inode = p;
1033 struct exofs_i_info *oi = exofs_i(inode); 1017 struct exofs_i_info *oi = exofs_i(inode);
1034 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; 1018 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
1035 int ret; 1019 int ret;
1036 1020
1037 ret = exofs_check_ok(or); 1021 ret = exofs_check_io(ios, NULL);
1038 osd_end_request(or); 1022 exofs_put_io_state(ios);
1023
1039 atomic_dec(&sbi->s_curr_pending); 1024 atomic_dec(&sbi->s_curr_pending);
1040 1025
1041 if (unlikely(ret)) { 1026 if (unlikely(ret)) {
1042 EXOFS_ERR("object=0x%llx creation faild in pid=0x%llx", 1027 EXOFS_ERR("object=0x%llx creation faild in pid=0x%llx",
1043 _LLU(sbi->s_pid), _LLU(inode->i_ino + EXOFS_OBJ_OFF)); 1028 _LLU(exofs_oi_objno(oi)), _LLU(sbi->s_pid));
1044 make_bad_inode(inode); 1029 /*TODO: When FS is corrupted creation can fail, object already
1045 } else 1030 * exist. Get rid of this asynchronous creation, if exist
1046 set_obj_created(oi); 1031 * increment the obj counter and try the next object. Until we
1032 * succeed. All these dangling objects will be made into lost
1033 * files by chkfs.exofs
1034 */
1035 }
1036
1037 set_obj_created(oi);
1047 1038
1048 atomic_dec(&inode->i_count); 1039 atomic_dec(&inode->i_count);
1049 wake_up(&oi->i_wq); 1040 wake_up(&oi->i_wq);
@@ -1058,8 +1049,7 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
1058 struct inode *inode; 1049 struct inode *inode;
1059 struct exofs_i_info *oi; 1050 struct exofs_i_info *oi;
1060 struct exofs_sb_info *sbi; 1051 struct exofs_sb_info *sbi;
1061 struct osd_request *or; 1052 struct exofs_io_state *ios;
1062 struct osd_obj_id obj;
1063 int ret; 1053 int ret;
1064 1054
1065 sb = dir->i_sb; 1055 sb = dir->i_sb;
@@ -1096,28 +1086,28 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
1096 1086
1097 mark_inode_dirty(inode); 1087 mark_inode_dirty(inode);
1098 1088
1099 obj.partition = sbi->s_pid; 1089 ret = exofs_get_io_state(sbi, &ios);
1100 obj.id = inode->i_ino + EXOFS_OBJ_OFF; 1090 if (unlikely(ret)) {
1101 exofs_make_credential(oi->i_cred, &obj); 1091 EXOFS_ERR("exofs_new_inode: exofs_get_io_state failed\n");
1102 1092 return ERR_PTR(ret);
1103 or = osd_start_request(sbi->s_dev, GFP_KERNEL);
1104 if (unlikely(!or)) {
1105 EXOFS_ERR("exofs_new_inode: osd_start_request failed\n");
1106 return ERR_PTR(-ENOMEM);
1107 } 1093 }
1108 1094
1109 osd_req_create_object(or, &obj); 1095 ios->obj.id = exofs_oi_objno(oi);
1096 exofs_make_credential(oi->i_cred, &ios->obj);
1110 1097
1111 /* increment the refcount so that the inode will still be around when we 1098 /* increment the refcount so that the inode will still be around when we
1112 * reach the callback 1099 * reach the callback
1113 */ 1100 */
1114 atomic_inc(&inode->i_count); 1101 atomic_inc(&inode->i_count);
1115 1102
1116 ret = exofs_async_op(or, create_done, inode, oi->i_cred); 1103 ios->done = create_done;
1104 ios->private = inode;
1105 ios->cred = oi->i_cred;
1106 ret = exofs_sbi_create(ios);
1117 if (ret) { 1107 if (ret) {
1118 atomic_dec(&inode->i_count); 1108 atomic_dec(&inode->i_count);
1119 osd_end_request(or); 1109 exofs_put_io_state(ios);
1120 return ERR_PTR(-EIO); 1110 return ERR_PTR(ret);
1121 } 1111 }
1122 atomic_inc(&sbi->s_curr_pending); 1112 atomic_inc(&sbi->s_curr_pending);
1123 1113
@@ -1135,11 +1125,11 @@ struct updatei_args {
1135/* 1125/*
1136 * Callback function from exofs_update_inode(). 1126 * Callback function from exofs_update_inode().
1137 */ 1127 */
1138static void updatei_done(struct osd_request *or, void *p) 1128static void updatei_done(struct exofs_io_state *ios, void *p)
1139{ 1129{
1140 struct updatei_args *args = p; 1130 struct updatei_args *args = p;
1141 1131
1142 osd_end_request(or); 1132 exofs_put_io_state(ios);
1143 1133
1144 atomic_dec(&args->sbi->s_curr_pending); 1134 atomic_dec(&args->sbi->s_curr_pending);
1145 1135
@@ -1155,8 +1145,7 @@ static int exofs_update_inode(struct inode *inode, int do_sync)
1155 struct exofs_i_info *oi = exofs_i(inode); 1145 struct exofs_i_info *oi = exofs_i(inode);
1156 struct super_block *sb = inode->i_sb; 1146 struct super_block *sb = inode->i_sb;
1157 struct exofs_sb_info *sbi = sb->s_fs_info; 1147 struct exofs_sb_info *sbi = sb->s_fs_info;
1158 struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF}; 1148 struct exofs_io_state *ios;
1159 struct osd_request *or;
1160 struct osd_attr attr; 1149 struct osd_attr attr;
1161 struct exofs_fcb *fcb; 1150 struct exofs_fcb *fcb;
1162 struct updatei_args *args; 1151 struct updatei_args *args;
@@ -1193,18 +1182,16 @@ static int exofs_update_inode(struct inode *inode, int do_sync)
1193 } else 1182 } else
1194 memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data)); 1183 memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data));
1195 1184
1196 or = osd_start_request(sbi->s_dev, GFP_KERNEL); 1185 ret = exofs_get_io_state(sbi, &ios);
1197 if (unlikely(!or)) { 1186 if (unlikely(ret)) {
1198 EXOFS_ERR("exofs_update_inode: osd_start_request failed.\n"); 1187 EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
1199 ret = -ENOMEM;
1200 goto free_args; 1188 goto free_args;
1201 } 1189 }
1202 1190
1203 osd_req_set_attributes(or, &obj);
1204
1205 attr = g_attr_inode_data; 1191 attr = g_attr_inode_data;
1206 attr.val_ptr = fcb; 1192 attr.val_ptr = fcb;
1207 osd_req_add_set_attr_list(or, &attr, 1); 1193 ios->out_attr_len = 1;
1194 ios->out_attr = &attr;
1208 1195
1209 if (!obj_created(oi)) { 1196 if (!obj_created(oi)) {
1210 EXOFS_DBGMSG("!obj_created\n"); 1197 EXOFS_DBGMSG("!obj_created\n");
@@ -1213,22 +1200,19 @@ static int exofs_update_inode(struct inode *inode, int do_sync)
1213 EXOFS_DBGMSG("wait_event done\n"); 1200 EXOFS_DBGMSG("wait_event done\n");
1214 } 1201 }
1215 1202
1216 if (do_sync) { 1203 if (!do_sync) {
1217 ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred);
1218 osd_end_request(or);
1219 goto free_args;
1220 } else {
1221 args->sbi = sbi; 1204 args->sbi = sbi;
1205 ios->done = updatei_done;
1206 ios->private = args;
1207 }
1222 1208
1223 ret = exofs_async_op(or, updatei_done, args, oi->i_cred); 1209 ret = exofs_oi_write(oi, ios);
1224 if (ret) { 1210 if (!do_sync && !ret) {
1225 osd_end_request(or);
1226 goto free_args;
1227 }
1228 atomic_inc(&sbi->s_curr_pending); 1211 atomic_inc(&sbi->s_curr_pending);
1229 goto out; /* deallocation in updatei_done */ 1212 goto out; /* deallocation in updatei_done */
1230 } 1213 }
1231 1214
1215 exofs_put_io_state(ios);
1232free_args: 1216free_args:
1233 kfree(args); 1217 kfree(args);
1234out: 1218out:
@@ -1245,11 +1229,12 @@ int exofs_write_inode(struct inode *inode, int wait)
1245 * Callback function from exofs_delete_inode() - don't have much cleaning up to 1229 * Callback function from exofs_delete_inode() - don't have much cleaning up to
1246 * do. 1230 * do.
1247 */ 1231 */
1248static void delete_done(struct osd_request *or, void *p) 1232static void delete_done(struct exofs_io_state *ios, void *p)
1249{ 1233{
1250 struct exofs_sb_info *sbi; 1234 struct exofs_sb_info *sbi = p;
1251 osd_end_request(or); 1235
1252 sbi = p; 1236 exofs_put_io_state(ios);
1237
1253 atomic_dec(&sbi->s_curr_pending); 1238 atomic_dec(&sbi->s_curr_pending);
1254} 1239}
1255 1240
@@ -1263,8 +1248,7 @@ void exofs_delete_inode(struct inode *inode)
1263 struct exofs_i_info *oi = exofs_i(inode); 1248 struct exofs_i_info *oi = exofs_i(inode);
1264 struct super_block *sb = inode->i_sb; 1249 struct super_block *sb = inode->i_sb;
1265 struct exofs_sb_info *sbi = sb->s_fs_info; 1250 struct exofs_sb_info *sbi = sb->s_fs_info;
1266 struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF}; 1251 struct exofs_io_state *ios;
1267 struct osd_request *or;
1268 int ret; 1252 int ret;
1269 1253
1270 truncate_inode_pages(&inode->i_data, 0); 1254 truncate_inode_pages(&inode->i_data, 0);
@@ -1281,25 +1265,26 @@ void exofs_delete_inode(struct inode *inode)
1281 1265
1282 clear_inode(inode); 1266 clear_inode(inode);
1283 1267
1284 or = osd_start_request(sbi->s_dev, GFP_KERNEL); 1268 ret = exofs_get_io_state(sbi, &ios);
1285 if (unlikely(!or)) { 1269 if (unlikely(ret)) {
1286 EXOFS_ERR("exofs_delete_inode: osd_start_request failed\n"); 1270 EXOFS_ERR("%s: exofs_get_io_state failed\n", __func__);
1287 return; 1271 return;
1288 } 1272 }
1289 1273
1290 osd_req_remove_object(or, &obj);
1291
1292 /* if we are deleting an obj that hasn't been created yet, wait */ 1274 /* if we are deleting an obj that hasn't been created yet, wait */
1293 if (!obj_created(oi)) { 1275 if (!obj_created(oi)) {
1294 BUG_ON(!obj_2bcreated(oi)); 1276 BUG_ON(!obj_2bcreated(oi));
1295 wait_event(oi->i_wq, obj_created(oi)); 1277 wait_event(oi->i_wq, obj_created(oi));
1296 } 1278 }
1297 1279
1298 ret = exofs_async_op(or, delete_done, sbi, oi->i_cred); 1280 ios->obj.id = exofs_oi_objno(oi);
1281 ios->done = delete_done;
1282 ios->private = sbi;
1283 ios->cred = oi->i_cred;
1284 ret = exofs_sbi_remove(ios);
1299 if (ret) { 1285 if (ret) {
1300 EXOFS_ERR( 1286 EXOFS_ERR("%s: exofs_sbi_remove failed\n", __func__);
1301 "ERROR: @exofs_delete_inode exofs_async_op failed\n"); 1287 exofs_put_io_state(ios);
1302 osd_end_request(or);
1303 return; 1288 return;
1304 } 1289 }
1305 atomic_inc(&sbi->s_curr_pending); 1290 atomic_inc(&sbi->s_curr_pending);