diff options
Diffstat (limited to 'fs/exofs/inode.c')
-rw-r--r-- | fs/exofs/inode.c | 547 |
1 files changed, 300 insertions, 247 deletions
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index 6c10f7476699..76d2a79ef93e 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c | |||
@@ -31,94 +31,117 @@ | |||
31 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | 31 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
32 | */ | 32 | */ |
33 | 33 | ||
34 | #include <linux/slab.h> | ||
34 | #include <linux/writeback.h> | 35 | #include <linux/writeback.h> |
35 | #include <linux/buffer_head.h> | 36 | #include <linux/buffer_head.h> |
36 | #include <scsi/scsi_device.h> | 37 | #include <scsi/scsi_device.h> |
37 | 38 | ||
38 | #include "exofs.h" | 39 | #include "exofs.h" |
39 | 40 | ||
40 | #ifdef CONFIG_EXOFS_DEBUG | 41 | #define EXOFS_DBGMSG2(M...) do {} while (0) |
41 | # define EXOFS_DEBUG_OBJ_ISIZE 1 | 42 | |
42 | #endif | 43 | enum { BIO_MAX_PAGES_KMALLOC = |
44 | (PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec), | ||
45 | MAX_PAGES_KMALLOC = | ||
46 | PAGE_SIZE / sizeof(struct page *), | ||
47 | }; | ||
43 | 48 | ||
44 | struct page_collect { | 49 | struct page_collect { |
45 | struct exofs_sb_info *sbi; | 50 | struct exofs_sb_info *sbi; |
46 | struct request_queue *req_q; | ||
47 | struct inode *inode; | 51 | struct inode *inode; |
48 | unsigned expected_pages; | 52 | unsigned expected_pages; |
53 | struct exofs_io_state *ios; | ||
49 | 54 | ||
50 | struct bio *bio; | 55 | struct page **pages; |
56 | unsigned alloc_pages; | ||
51 | unsigned nr_pages; | 57 | unsigned nr_pages; |
52 | unsigned long length; | 58 | unsigned long length; |
53 | loff_t pg_first; /* keep 64bit also in 32-arches */ | 59 | loff_t pg_first; /* keep 64bit also in 32-arches */ |
54 | }; | 60 | }; |
55 | 61 | ||
56 | static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, | 62 | static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, |
57 | struct inode *inode) | 63 | struct inode *inode) |
58 | { | 64 | { |
59 | struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; | 65 | struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; |
60 | 66 | ||
61 | pcol->sbi = sbi; | 67 | pcol->sbi = sbi; |
62 | pcol->req_q = osd_request_queue(sbi->s_dev); | ||
63 | pcol->inode = inode; | 68 | pcol->inode = inode; |
64 | pcol->expected_pages = expected_pages; | 69 | pcol->expected_pages = expected_pages; |
65 | 70 | ||
66 | pcol->bio = NULL; | 71 | pcol->ios = NULL; |
72 | pcol->pages = NULL; | ||
73 | pcol->alloc_pages = 0; | ||
67 | pcol->nr_pages = 0; | 74 | pcol->nr_pages = 0; |
68 | pcol->length = 0; | 75 | pcol->length = 0; |
69 | pcol->pg_first = -1; | 76 | pcol->pg_first = -1; |
70 | |||
71 | EXOFS_DBGMSG("_pcol_init ino=0x%lx expected_pages=%u\n", inode->i_ino, | ||
72 | expected_pages); | ||
73 | } | 77 | } |
74 | 78 | ||
75 | static void _pcol_reset(struct page_collect *pcol) | 79 | static void _pcol_reset(struct page_collect *pcol) |
76 | { | 80 | { |
77 | pcol->expected_pages -= min(pcol->nr_pages, pcol->expected_pages); | 81 | pcol->expected_pages -= min(pcol->nr_pages, pcol->expected_pages); |
78 | 82 | ||
79 | pcol->bio = NULL; | 83 | pcol->pages = NULL; |
84 | pcol->alloc_pages = 0; | ||
80 | pcol->nr_pages = 0; | 85 | pcol->nr_pages = 0; |
81 | pcol->length = 0; | 86 | pcol->length = 0; |
82 | pcol->pg_first = -1; | 87 | pcol->pg_first = -1; |
83 | EXOFS_DBGMSG("_pcol_reset ino=0x%lx expected_pages=%u\n", | 88 | pcol->ios = NULL; |
84 | pcol->inode->i_ino, pcol->expected_pages); | ||
85 | 89 | ||
86 | /* this is probably the end of the loop but in writes | 90 | /* this is probably the end of the loop but in writes |
87 | * it might not end here. don't be left with nothing | 91 | * it might not end here. don't be left with nothing |
88 | */ | 92 | */ |
89 | if (!pcol->expected_pages) | 93 | if (!pcol->expected_pages) |
90 | pcol->expected_pages = 128; | 94 | pcol->expected_pages = MAX_PAGES_KMALLOC; |
91 | } | 95 | } |
92 | 96 | ||
93 | static int pcol_try_alloc(struct page_collect *pcol) | 97 | static int pcol_try_alloc(struct page_collect *pcol) |
94 | { | 98 | { |
95 | int pages = min_t(unsigned, pcol->expected_pages, BIO_MAX_PAGES); | 99 | unsigned pages = min_t(unsigned, pcol->expected_pages, |
100 | MAX_PAGES_KMALLOC); | ||
101 | |||
102 | if (!pcol->ios) { /* First time allocate io_state */ | ||
103 | int ret = exofs_get_io_state(&pcol->sbi->layout, &pcol->ios); | ||
104 | |||
105 | if (ret) | ||
106 | return ret; | ||
107 | } | ||
108 | |||
109 | /* TODO: easily support bio chaining */ | ||
110 | pages = min_t(unsigned, pages, | ||
111 | pcol->sbi->layout.group_width * BIO_MAX_PAGES_KMALLOC); | ||
96 | 112 | ||
97 | for (; pages; pages >>= 1) { | 113 | for (; pages; pages >>= 1) { |
98 | pcol->bio = bio_alloc(GFP_KERNEL, pages); | 114 | pcol->pages = kmalloc(pages * sizeof(struct page *), |
99 | if (likely(pcol->bio)) | 115 | GFP_KERNEL); |
116 | if (likely(pcol->pages)) { | ||
117 | pcol->alloc_pages = pages; | ||
100 | return 0; | 118 | return 0; |
119 | } | ||
101 | } | 120 | } |
102 | 121 | ||
103 | EXOFS_ERR("Failed to kcalloc expected_pages=%u\n", | 122 | EXOFS_ERR("Failed to kmalloc expected_pages=%u\n", |
104 | pcol->expected_pages); | 123 | pcol->expected_pages); |
105 | return -ENOMEM; | 124 | return -ENOMEM; |
106 | } | 125 | } |
107 | 126 | ||
108 | static void pcol_free(struct page_collect *pcol) | 127 | static void pcol_free(struct page_collect *pcol) |
109 | { | 128 | { |
110 | bio_put(pcol->bio); | 129 | kfree(pcol->pages); |
111 | pcol->bio = NULL; | 130 | pcol->pages = NULL; |
131 | |||
132 | if (pcol->ios) { | ||
133 | exofs_put_io_state(pcol->ios); | ||
134 | pcol->ios = NULL; | ||
135 | } | ||
112 | } | 136 | } |
113 | 137 | ||
114 | static int pcol_add_page(struct page_collect *pcol, struct page *page, | 138 | static int pcol_add_page(struct page_collect *pcol, struct page *page, |
115 | unsigned len) | 139 | unsigned len) |
116 | { | 140 | { |
117 | int added_len = bio_add_pc_page(pcol->req_q, pcol->bio, page, len, 0); | 141 | if (unlikely(pcol->nr_pages >= pcol->alloc_pages)) |
118 | if (unlikely(len != added_len)) | ||
119 | return -ENOMEM; | 142 | return -ENOMEM; |
120 | 143 | ||
121 | ++pcol->nr_pages; | 144 | pcol->pages[pcol->nr_pages++] = page; |
122 | pcol->length += len; | 145 | pcol->length += len; |
123 | return 0; | 146 | return 0; |
124 | } | 147 | } |
@@ -161,32 +184,26 @@ static void update_write_page(struct page *page, int ret) | |||
161 | /* Called at the end of reads, to optionally unlock pages and update their | 184 | /* Called at the end of reads, to optionally unlock pages and update their |
162 | * status. | 185 | * status. |
163 | */ | 186 | */ |
164 | static int __readpages_done(struct osd_request *or, struct page_collect *pcol, | 187 | static int __readpages_done(struct page_collect *pcol, bool do_unlock) |
165 | bool do_unlock) | ||
166 | { | 188 | { |
167 | struct bio_vec *bvec; | ||
168 | int i; | 189 | int i; |
169 | u64 resid; | 190 | u64 resid; |
170 | u64 good_bytes; | 191 | u64 good_bytes; |
171 | u64 length = 0; | 192 | u64 length = 0; |
172 | int ret = exofs_check_ok_resid(or, &resid, NULL); | 193 | int ret = exofs_check_io(pcol->ios, &resid); |
173 | |||
174 | osd_end_request(or); | ||
175 | 194 | ||
176 | if (likely(!ret)) | 195 | if (likely(!ret)) |
177 | good_bytes = pcol->length; | 196 | good_bytes = pcol->length; |
178 | else if (!resid) | ||
179 | good_bytes = 0; | ||
180 | else | 197 | else |
181 | good_bytes = pcol->length - resid; | 198 | good_bytes = pcol->length - resid; |
182 | 199 | ||
183 | EXOFS_DBGMSG("readpages_done(0x%lx) good_bytes=0x%llx" | 200 | EXOFS_DBGMSG2("readpages_done(0x%lx) good_bytes=0x%llx" |
184 | " length=0x%lx nr_pages=%u\n", | 201 | " length=0x%lx nr_pages=%u\n", |
185 | pcol->inode->i_ino, _LLU(good_bytes), pcol->length, | 202 | pcol->inode->i_ino, _LLU(good_bytes), pcol->length, |
186 | pcol->nr_pages); | 203 | pcol->nr_pages); |
187 | 204 | ||
188 | __bio_for_each_segment(bvec, pcol->bio, i, 0) { | 205 | for (i = 0; i < pcol->nr_pages; i++) { |
189 | struct page *page = bvec->bv_page; | 206 | struct page *page = pcol->pages[i]; |
190 | struct inode *inode = page->mapping->host; | 207 | struct inode *inode = page->mapping->host; |
191 | int page_stat; | 208 | int page_stat; |
192 | 209 | ||
@@ -198,38 +215,37 @@ static int __readpages_done(struct osd_request *or, struct page_collect *pcol, | |||
198 | else | 215 | else |
199 | page_stat = ret; | 216 | page_stat = ret; |
200 | 217 | ||
201 | EXOFS_DBGMSG(" readpages_done(0x%lx, 0x%lx) %s\n", | 218 | EXOFS_DBGMSG2(" readpages_done(0x%lx, 0x%lx) %s\n", |
202 | inode->i_ino, page->index, | 219 | inode->i_ino, page->index, |
203 | page_stat ? "bad_bytes" : "good_bytes"); | 220 | page_stat ? "bad_bytes" : "good_bytes"); |
204 | 221 | ||
205 | ret = update_read_page(page, page_stat); | 222 | ret = update_read_page(page, page_stat); |
206 | if (do_unlock) | 223 | if (do_unlock) |
207 | unlock_page(page); | 224 | unlock_page(page); |
208 | length += bvec->bv_len; | 225 | length += PAGE_SIZE; |
209 | } | 226 | } |
210 | 227 | ||
211 | pcol_free(pcol); | 228 | pcol_free(pcol); |
212 | EXOFS_DBGMSG("readpages_done END\n"); | 229 | EXOFS_DBGMSG2("readpages_done END\n"); |
213 | return ret; | 230 | return ret; |
214 | } | 231 | } |
215 | 232 | ||
216 | /* callback of async reads */ | 233 | /* callback of async reads */ |
217 | static void readpages_done(struct osd_request *or, void *p) | 234 | static void readpages_done(struct exofs_io_state *ios, void *p) |
218 | { | 235 | { |
219 | struct page_collect *pcol = p; | 236 | struct page_collect *pcol = p; |
220 | 237 | ||
221 | __readpages_done(or, pcol, true); | 238 | __readpages_done(pcol, true); |
222 | atomic_dec(&pcol->sbi->s_curr_pending); | 239 | atomic_dec(&pcol->sbi->s_curr_pending); |
223 | kfree(p); | 240 | kfree(pcol); |
224 | } | 241 | } |
225 | 242 | ||
226 | static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw) | 243 | static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw) |
227 | { | 244 | { |
228 | struct bio_vec *bvec; | ||
229 | int i; | 245 | int i; |
230 | 246 | ||
231 | __bio_for_each_segment(bvec, pcol->bio, i, 0) { | 247 | for (i = 0; i < pcol->nr_pages; i++) { |
232 | struct page *page = bvec->bv_page; | 248 | struct page *page = pcol->pages[i]; |
233 | 249 | ||
234 | if (rw == READ) | 250 | if (rw == READ) |
235 | update_read_page(page, ret); | 251 | update_read_page(page, ret); |
@@ -238,36 +254,29 @@ static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw) | |||
238 | 254 | ||
239 | unlock_page(page); | 255 | unlock_page(page); |
240 | } | 256 | } |
241 | pcol_free(pcol); | ||
242 | } | 257 | } |
243 | 258 | ||
244 | static int read_exec(struct page_collect *pcol, bool is_sync) | 259 | static int read_exec(struct page_collect *pcol, bool is_sync) |
245 | { | 260 | { |
246 | struct exofs_i_info *oi = exofs_i(pcol->inode); | 261 | struct exofs_i_info *oi = exofs_i(pcol->inode); |
247 | struct osd_obj_id obj = {pcol->sbi->s_pid, | 262 | struct exofs_io_state *ios = pcol->ios; |
248 | pcol->inode->i_ino + EXOFS_OBJ_OFF}; | ||
249 | struct osd_request *or = NULL; | ||
250 | struct page_collect *pcol_copy = NULL; | 263 | struct page_collect *pcol_copy = NULL; |
251 | loff_t i_start = pcol->pg_first << PAGE_CACHE_SHIFT; | ||
252 | int ret; | 264 | int ret; |
253 | 265 | ||
254 | if (!pcol->bio) | 266 | if (!pcol->pages) |
255 | return 0; | 267 | return 0; |
256 | 268 | ||
257 | /* see comment in _readpage() about sync reads */ | 269 | /* see comment in _readpage() about sync reads */ |
258 | WARN_ON(is_sync && (pcol->nr_pages != 1)); | 270 | WARN_ON(is_sync && (pcol->nr_pages != 1)); |
259 | 271 | ||
260 | or = osd_start_request(pcol->sbi->s_dev, GFP_KERNEL); | 272 | ios->pages = pcol->pages; |
261 | if (unlikely(!or)) { | 273 | ios->nr_pages = pcol->nr_pages; |
262 | ret = -ENOMEM; | 274 | ios->length = pcol->length; |
263 | goto err; | 275 | ios->offset = pcol->pg_first << PAGE_CACHE_SHIFT; |
264 | } | ||
265 | |||
266 | osd_req_read(or, &obj, i_start, pcol->bio, pcol->length); | ||
267 | 276 | ||
268 | if (is_sync) { | 277 | if (is_sync) { |
269 | exofs_sync_op(or, pcol->sbi->s_timeout, oi->i_cred); | 278 | exofs_oi_read(oi, pcol->ios); |
270 | return __readpages_done(or, pcol, false); | 279 | return __readpages_done(pcol, false); |
271 | } | 280 | } |
272 | 281 | ||
273 | pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); | 282 | pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); |
@@ -277,14 +286,16 @@ static int read_exec(struct page_collect *pcol, bool is_sync) | |||
277 | } | 286 | } |
278 | 287 | ||
279 | *pcol_copy = *pcol; | 288 | *pcol_copy = *pcol; |
280 | ret = exofs_async_op(or, readpages_done, pcol_copy, oi->i_cred); | 289 | ios->done = readpages_done; |
290 | ios->private = pcol_copy; | ||
291 | ret = exofs_oi_read(oi, ios); | ||
281 | if (unlikely(ret)) | 292 | if (unlikely(ret)) |
282 | goto err; | 293 | goto err; |
283 | 294 | ||
284 | atomic_inc(&pcol->sbi->s_curr_pending); | 295 | atomic_inc(&pcol->sbi->s_curr_pending); |
285 | 296 | ||
286 | EXOFS_DBGMSG("read_exec obj=0x%llx start=0x%llx length=0x%lx\n", | 297 | EXOFS_DBGMSG2("read_exec obj=0x%llx start=0x%llx length=0x%lx\n", |
287 | obj.id, _LLU(i_start), pcol->length); | 298 | ios->obj.id, _LLU(ios->offset), pcol->length); |
288 | 299 | ||
289 | /* pages ownership was passed to pcol_copy */ | 300 | /* pages ownership was passed to pcol_copy */ |
290 | _pcol_reset(pcol); | 301 | _pcol_reset(pcol); |
@@ -293,12 +304,10 @@ static int read_exec(struct page_collect *pcol, bool is_sync) | |||
293 | err: | 304 | err: |
294 | if (!is_sync) | 305 | if (!is_sync) |
295 | _unlock_pcol_pages(pcol, ret, READ); | 306 | _unlock_pcol_pages(pcol, ret, READ); |
296 | else /* Pages unlocked by caller in sync mode only free bio */ | 307 | |
297 | pcol_free(pcol); | 308 | pcol_free(pcol); |
298 | 309 | ||
299 | kfree(pcol_copy); | 310 | kfree(pcol_copy); |
300 | if (or) | ||
301 | osd_end_request(or); | ||
302 | return ret; | 311 | return ret; |
303 | } | 312 | } |
304 | 313 | ||
@@ -361,7 +370,7 @@ try_again: | |||
361 | goto try_again; | 370 | goto try_again; |
362 | } | 371 | } |
363 | 372 | ||
364 | if (!pcol->bio) { | 373 | if (!pcol->pages) { |
365 | ret = pcol_try_alloc(pcol); | 374 | ret = pcol_try_alloc(pcol); |
366 | if (unlikely(ret)) | 375 | if (unlikely(ret)) |
367 | goto fail; | 376 | goto fail; |
@@ -370,12 +379,12 @@ try_again: | |||
370 | if (len != PAGE_CACHE_SIZE) | 379 | if (len != PAGE_CACHE_SIZE) |
371 | zero_user(page, len, PAGE_CACHE_SIZE - len); | 380 | zero_user(page, len, PAGE_CACHE_SIZE - len); |
372 | 381 | ||
373 | EXOFS_DBGMSG(" readpage_strip(0x%lx, 0x%lx) len=0x%zx\n", | 382 | EXOFS_DBGMSG2(" readpage_strip(0x%lx, 0x%lx) len=0x%zx\n", |
374 | inode->i_ino, page->index, len); | 383 | inode->i_ino, page->index, len); |
375 | 384 | ||
376 | ret = pcol_add_page(pcol, page, len); | 385 | ret = pcol_add_page(pcol, page, len); |
377 | if (ret) { | 386 | if (ret) { |
378 | EXOFS_DBGMSG("Failed pcol_add_page pages[i]=%p " | 387 | EXOFS_DBGMSG2("Failed pcol_add_page pages[i]=%p " |
379 | "this_len=0x%zx nr_pages=%u length=0x%lx\n", | 388 | "this_len=0x%zx nr_pages=%u length=0x%lx\n", |
380 | page, len, pcol->nr_pages, pcol->length); | 389 | page, len, pcol->nr_pages, pcol->length); |
381 | 390 | ||
@@ -419,9 +428,8 @@ static int _readpage(struct page *page, bool is_sync) | |||
419 | 428 | ||
420 | _pcol_init(&pcol, 1, page->mapping->host); | 429 | _pcol_init(&pcol, 1, page->mapping->host); |
421 | 430 | ||
422 | /* readpage_strip might call read_exec(,async) inside at several places | 431 | /* readpage_strip might call read_exec(,is_sync==false) at several |
423 | * but this is safe for is_async=0 since read_exec will not do anything | 432 | * places but not if we have a single page. |
424 | * when we have a single page. | ||
425 | */ | 433 | */ |
426 | ret = readpage_strip(&pcol, page); | 434 | ret = readpage_strip(&pcol, page); |
427 | if (ret) { | 435 | if (ret) { |
@@ -440,35 +448,30 @@ static int exofs_readpage(struct file *file, struct page *page) | |||
440 | return _readpage(page, false); | 448 | return _readpage(page, false); |
441 | } | 449 | } |
442 | 450 | ||
443 | /* Callback for osd_write. All writes are asynchronouse */ | 451 | /* Callback for osd_write. All writes are asynchronous */ |
444 | static void writepages_done(struct osd_request *or, void *p) | 452 | static void writepages_done(struct exofs_io_state *ios, void *p) |
445 | { | 453 | { |
446 | struct page_collect *pcol = p; | 454 | struct page_collect *pcol = p; |
447 | struct bio_vec *bvec; | ||
448 | int i; | 455 | int i; |
449 | u64 resid; | 456 | u64 resid; |
450 | u64 good_bytes; | 457 | u64 good_bytes; |
451 | u64 length = 0; | 458 | u64 length = 0; |
459 | int ret = exofs_check_io(ios, &resid); | ||
452 | 460 | ||
453 | int ret = exofs_check_ok_resid(or, NULL, &resid); | ||
454 | |||
455 | osd_end_request(or); | ||
456 | atomic_dec(&pcol->sbi->s_curr_pending); | 461 | atomic_dec(&pcol->sbi->s_curr_pending); |
457 | 462 | ||
458 | if (likely(!ret)) | 463 | if (likely(!ret)) |
459 | good_bytes = pcol->length; | 464 | good_bytes = pcol->length; |
460 | else if (!resid) | ||
461 | good_bytes = 0; | ||
462 | else | 465 | else |
463 | good_bytes = pcol->length - resid; | 466 | good_bytes = pcol->length - resid; |
464 | 467 | ||
465 | EXOFS_DBGMSG("writepages_done(0x%lx) good_bytes=0x%llx" | 468 | EXOFS_DBGMSG2("writepages_done(0x%lx) good_bytes=0x%llx" |
466 | " length=0x%lx nr_pages=%u\n", | 469 | " length=0x%lx nr_pages=%u\n", |
467 | pcol->inode->i_ino, _LLU(good_bytes), pcol->length, | 470 | pcol->inode->i_ino, _LLU(good_bytes), pcol->length, |
468 | pcol->nr_pages); | 471 | pcol->nr_pages); |
469 | 472 | ||
470 | __bio_for_each_segment(bvec, pcol->bio, i, 0) { | 473 | for (i = 0; i < pcol->nr_pages; i++) { |
471 | struct page *page = bvec->bv_page; | 474 | struct page *page = pcol->pages[i]; |
472 | struct inode *inode = page->mapping->host; | 475 | struct inode *inode = page->mapping->host; |
473 | int page_stat; | 476 | int page_stat; |
474 | 477 | ||
@@ -482,37 +485,27 @@ static void writepages_done(struct osd_request *or, void *p) | |||
482 | 485 | ||
483 | update_write_page(page, page_stat); | 486 | update_write_page(page, page_stat); |
484 | unlock_page(page); | 487 | unlock_page(page); |
485 | EXOFS_DBGMSG(" writepages_done(0x%lx, 0x%lx) status=%d\n", | 488 | EXOFS_DBGMSG2(" writepages_done(0x%lx, 0x%lx) status=%d\n", |
486 | inode->i_ino, page->index, page_stat); | 489 | inode->i_ino, page->index, page_stat); |
487 | 490 | ||
488 | length += bvec->bv_len; | 491 | length += PAGE_SIZE; |
489 | } | 492 | } |
490 | 493 | ||
491 | pcol_free(pcol); | 494 | pcol_free(pcol); |
492 | kfree(pcol); | 495 | kfree(pcol); |
493 | EXOFS_DBGMSG("writepages_done END\n"); | 496 | EXOFS_DBGMSG2("writepages_done END\n"); |
494 | } | 497 | } |
495 | 498 | ||
496 | static int write_exec(struct page_collect *pcol) | 499 | static int write_exec(struct page_collect *pcol) |
497 | { | 500 | { |
498 | struct exofs_i_info *oi = exofs_i(pcol->inode); | 501 | struct exofs_i_info *oi = exofs_i(pcol->inode); |
499 | struct osd_obj_id obj = {pcol->sbi->s_pid, | 502 | struct exofs_io_state *ios = pcol->ios; |
500 | pcol->inode->i_ino + EXOFS_OBJ_OFF}; | ||
501 | struct osd_request *or = NULL; | ||
502 | struct page_collect *pcol_copy = NULL; | 503 | struct page_collect *pcol_copy = NULL; |
503 | loff_t i_start = pcol->pg_first << PAGE_CACHE_SHIFT; | ||
504 | int ret; | 504 | int ret; |
505 | 505 | ||
506 | if (!pcol->bio) | 506 | if (!pcol->pages) |
507 | return 0; | 507 | return 0; |
508 | 508 | ||
509 | or = osd_start_request(pcol->sbi->s_dev, GFP_KERNEL); | ||
510 | if (unlikely(!or)) { | ||
511 | EXOFS_ERR("write_exec: Faild to osd_start_request()\n"); | ||
512 | ret = -ENOMEM; | ||
513 | goto err; | ||
514 | } | ||
515 | |||
516 | pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); | 509 | pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); |
517 | if (!pcol_copy) { | 510 | if (!pcol_copy) { |
518 | EXOFS_ERR("write_exec: Faild to kmalloc(pcol)\n"); | 511 | EXOFS_ERR("write_exec: Faild to kmalloc(pcol)\n"); |
@@ -522,17 +515,22 @@ static int write_exec(struct page_collect *pcol) | |||
522 | 515 | ||
523 | *pcol_copy = *pcol; | 516 | *pcol_copy = *pcol; |
524 | 517 | ||
525 | pcol_copy->bio->bi_rw |= (1 << BIO_RW); /* FIXME: bio_set_dir() */ | 518 | ios->pages = pcol_copy->pages; |
526 | osd_req_write(or, &obj, i_start, pcol_copy->bio, pcol_copy->length); | 519 | ios->nr_pages = pcol_copy->nr_pages; |
527 | ret = exofs_async_op(or, writepages_done, pcol_copy, oi->i_cred); | 520 | ios->offset = pcol_copy->pg_first << PAGE_CACHE_SHIFT; |
521 | ios->length = pcol_copy->length; | ||
522 | ios->done = writepages_done; | ||
523 | ios->private = pcol_copy; | ||
524 | |||
525 | ret = exofs_oi_write(oi, ios); | ||
528 | if (unlikely(ret)) { | 526 | if (unlikely(ret)) { |
529 | EXOFS_ERR("write_exec: exofs_async_op() Faild\n"); | 527 | EXOFS_ERR("write_exec: exofs_oi_write() Faild\n"); |
530 | goto err; | 528 | goto err; |
531 | } | 529 | } |
532 | 530 | ||
533 | atomic_inc(&pcol->sbi->s_curr_pending); | 531 | atomic_inc(&pcol->sbi->s_curr_pending); |
534 | EXOFS_DBGMSG("write_exec(0x%lx, 0x%llx) start=0x%llx length=0x%lx\n", | 532 | EXOFS_DBGMSG2("write_exec(0x%lx, 0x%llx) start=0x%llx length=0x%lx\n", |
535 | pcol->inode->i_ino, pcol->pg_first, _LLU(i_start), | 533 | pcol->inode->i_ino, pcol->pg_first, _LLU(ios->offset), |
536 | pcol->length); | 534 | pcol->length); |
537 | /* pages ownership was passed to pcol_copy */ | 535 | /* pages ownership was passed to pcol_copy */ |
538 | _pcol_reset(pcol); | 536 | _pcol_reset(pcol); |
@@ -540,9 +538,9 @@ static int write_exec(struct page_collect *pcol) | |||
540 | 538 | ||
541 | err: | 539 | err: |
542 | _unlock_pcol_pages(pcol, ret, WRITE); | 540 | _unlock_pcol_pages(pcol, ret, WRITE); |
541 | pcol_free(pcol); | ||
543 | kfree(pcol_copy); | 542 | kfree(pcol_copy); |
544 | if (or) | 543 | |
545 | osd_end_request(or); | ||
546 | return ret; | 544 | return ret; |
547 | } | 545 | } |
548 | 546 | ||
@@ -586,6 +584,9 @@ static int writepage_strip(struct page *page, | |||
586 | if (PageError(page)) | 584 | if (PageError(page)) |
587 | ClearPageError(page); | 585 | ClearPageError(page); |
588 | unlock_page(page); | 586 | unlock_page(page); |
587 | EXOFS_DBGMSG("writepage_strip(0x%lx, 0x%lx) " | ||
588 | "outside the limits\n", | ||
589 | inode->i_ino, page->index); | ||
589 | return 0; | 590 | return 0; |
590 | } | 591 | } |
591 | } | 592 | } |
@@ -600,21 +601,24 @@ try_again: | |||
600 | ret = write_exec(pcol); | 601 | ret = write_exec(pcol); |
601 | if (unlikely(ret)) | 602 | if (unlikely(ret)) |
602 | goto fail; | 603 | goto fail; |
604 | |||
605 | EXOFS_DBGMSG("writepage_strip(0x%lx, 0x%lx) Discontinuity\n", | ||
606 | inode->i_ino, page->index); | ||
603 | goto try_again; | 607 | goto try_again; |
604 | } | 608 | } |
605 | 609 | ||
606 | if (!pcol->bio) { | 610 | if (!pcol->pages) { |
607 | ret = pcol_try_alloc(pcol); | 611 | ret = pcol_try_alloc(pcol); |
608 | if (unlikely(ret)) | 612 | if (unlikely(ret)) |
609 | goto fail; | 613 | goto fail; |
610 | } | 614 | } |
611 | 615 | ||
612 | EXOFS_DBGMSG(" writepage_strip(0x%lx, 0x%lx) len=0x%zx\n", | 616 | EXOFS_DBGMSG2(" writepage_strip(0x%lx, 0x%lx) len=0x%zx\n", |
613 | inode->i_ino, page->index, len); | 617 | inode->i_ino, page->index, len); |
614 | 618 | ||
615 | ret = pcol_add_page(pcol, page, len); | 619 | ret = pcol_add_page(pcol, page, len); |
616 | if (unlikely(ret)) { | 620 | if (unlikely(ret)) { |
617 | EXOFS_DBGMSG("Failed pcol_add_page " | 621 | EXOFS_DBGMSG2("Failed pcol_add_page " |
618 | "nr_pages=%u total_length=0x%lx\n", | 622 | "nr_pages=%u total_length=0x%lx\n", |
619 | pcol->nr_pages, pcol->length); | 623 | pcol->nr_pages, pcol->length); |
620 | 624 | ||
@@ -634,6 +638,8 @@ try_again: | |||
634 | return 0; | 638 | return 0; |
635 | 639 | ||
636 | fail: | 640 | fail: |
641 | EXOFS_DBGMSG("Error: writepage_strip(0x%lx, 0x%lx)=>%d\n", | ||
642 | inode->i_ino, page->index, ret); | ||
637 | set_bit(AS_EIO, &page->mapping->flags); | 643 | set_bit(AS_EIO, &page->mapping->flags); |
638 | unlock_page(page); | 644 | unlock_page(page); |
639 | return ret; | 645 | return ret; |
@@ -652,14 +658,17 @@ static int exofs_writepages(struct address_space *mapping, | |||
652 | wbc->range_end >> PAGE_CACHE_SHIFT; | 658 | wbc->range_end >> PAGE_CACHE_SHIFT; |
653 | 659 | ||
654 | if (start || end) | 660 | if (start || end) |
655 | expected_pages = min(end - start + 1, 32L); | 661 | expected_pages = end - start + 1; |
656 | else | 662 | else |
657 | expected_pages = mapping->nrpages; | 663 | expected_pages = mapping->nrpages; |
658 | 664 | ||
659 | EXOFS_DBGMSG("inode(0x%lx) wbc->start=0x%llx wbc->end=0x%llx" | 665 | if (expected_pages < 32L) |
660 | " m->nrpages=%lu start=0x%lx end=0x%lx\n", | 666 | expected_pages = 32L; |
667 | |||
668 | EXOFS_DBGMSG2("inode(0x%lx) wbc->start=0x%llx wbc->end=0x%llx " | ||
669 | "nrpages=%lu start=0x%lx end=0x%lx expected_pages=%ld\n", | ||
661 | mapping->host->i_ino, wbc->range_start, wbc->range_end, | 670 | mapping->host->i_ino, wbc->range_start, wbc->range_end, |
662 | mapping->nrpages, start, end); | 671 | mapping->nrpages, start, end, expected_pages); |
663 | 672 | ||
664 | _pcol_init(&pcol, expected_pages, mapping->host); | 673 | _pcol_init(&pcol, expected_pages, mapping->host); |
665 | 674 | ||
@@ -731,13 +740,28 @@ static int exofs_write_begin_export(struct file *file, | |||
731 | fsdata); | 740 | fsdata); |
732 | } | 741 | } |
733 | 742 | ||
743 | static int exofs_write_end(struct file *file, struct address_space *mapping, | ||
744 | loff_t pos, unsigned len, unsigned copied, | ||
745 | struct page *page, void *fsdata) | ||
746 | { | ||
747 | struct inode *inode = mapping->host; | ||
748 | /* According to comment in simple_write_end i_mutex is held */ | ||
749 | loff_t i_size = inode->i_size; | ||
750 | int ret; | ||
751 | |||
752 | ret = simple_write_end(file, mapping,pos, len, copied, page, fsdata); | ||
753 | if (i_size != inode->i_size) | ||
754 | mark_inode_dirty(inode); | ||
755 | return ret; | ||
756 | } | ||
757 | |||
734 | const struct address_space_operations exofs_aops = { | 758 | const struct address_space_operations exofs_aops = { |
735 | .readpage = exofs_readpage, | 759 | .readpage = exofs_readpage, |
736 | .readpages = exofs_readpages, | 760 | .readpages = exofs_readpages, |
737 | .writepage = exofs_writepage, | 761 | .writepage = exofs_writepage, |
738 | .writepages = exofs_writepages, | 762 | .writepages = exofs_writepages, |
739 | .write_begin = exofs_write_begin_export, | 763 | .write_begin = exofs_write_begin_export, |
740 | .write_end = simple_write_end, | 764 | .write_end = exofs_write_end, |
741 | }; | 765 | }; |
742 | 766 | ||
743 | /****************************************************************************** | 767 | /****************************************************************************** |
@@ -771,19 +795,28 @@ static int exofs_get_block(struct inode *inode, sector_t iblock, | |||
771 | const struct osd_attr g_attr_logical_length = ATTR_DEF( | 795 | const struct osd_attr g_attr_logical_length = ATTR_DEF( |
772 | OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); | 796 | OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); |
773 | 797 | ||
798 | static int _do_truncate(struct inode *inode) | ||
799 | { | ||
800 | struct exofs_i_info *oi = exofs_i(inode); | ||
801 | loff_t isize = i_size_read(inode); | ||
802 | int ret; | ||
803 | |||
804 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
805 | |||
806 | nobh_truncate_page(inode->i_mapping, isize, exofs_get_block); | ||
807 | |||
808 | ret = exofs_oi_truncate(oi, (u64)isize); | ||
809 | EXOFS_DBGMSG("(0x%lx) size=0x%llx\n", inode->i_ino, isize); | ||
810 | return ret; | ||
811 | } | ||
812 | |||
774 | /* | 813 | /* |
775 | * Truncate a file to the specified size - all we have to do is set the size | 814 | * Truncate a file to the specified size - all we have to do is set the size |
776 | * attribute. We make sure the object exists first. | 815 | * attribute. We make sure the object exists first. |
777 | */ | 816 | */ |
778 | void exofs_truncate(struct inode *inode) | 817 | void exofs_truncate(struct inode *inode) |
779 | { | 818 | { |
780 | struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; | ||
781 | struct exofs_i_info *oi = exofs_i(inode); | 819 | struct exofs_i_info *oi = exofs_i(inode); |
782 | struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF}; | ||
783 | struct osd_request *or; | ||
784 | struct osd_attr attr; | ||
785 | loff_t isize = i_size_read(inode); | ||
786 | __be64 newsize; | ||
787 | int ret; | 820 | int ret; |
788 | 821 | ||
789 | if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) | 822 | if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) |
@@ -793,22 +826,6 @@ void exofs_truncate(struct inode *inode) | |||
793 | return; | 826 | return; |
794 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) | 827 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) |
795 | return; | 828 | return; |
796 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
797 | |||
798 | nobh_truncate_page(inode->i_mapping, isize, exofs_get_block); | ||
799 | |||
800 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | ||
801 | if (unlikely(!or)) { | ||
802 | EXOFS_ERR("ERROR: exofs_truncate: osd_start_request failed\n"); | ||
803 | goto fail; | ||
804 | } | ||
805 | |||
806 | osd_req_set_attributes(or, &obj); | ||
807 | |||
808 | newsize = cpu_to_be64((u64)isize); | ||
809 | attr = g_attr_logical_length; | ||
810 | attr.val_ptr = &newsize; | ||
811 | osd_req_add_set_attr_list(or, &attr, 1); | ||
812 | 829 | ||
813 | /* if we are about to truncate an object, and it hasn't been | 830 | /* if we are about to truncate an object, and it hasn't been |
814 | * created yet, wait | 831 | * created yet, wait |
@@ -816,8 +833,7 @@ void exofs_truncate(struct inode *inode) | |||
816 | if (unlikely(wait_obj_created(oi))) | 833 | if (unlikely(wait_obj_created(oi))) |
817 | goto fail; | 834 | goto fail; |
818 | 835 | ||
819 | ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred); | 836 | ret = _do_truncate(inode); |
820 | osd_end_request(or); | ||
821 | if (ret) | 837 | if (ret) |
822 | goto fail; | 838 | goto fail; |
823 | 839 | ||
@@ -845,67 +861,110 @@ int exofs_setattr(struct dentry *dentry, struct iattr *iattr) | |||
845 | return error; | 861 | return error; |
846 | } | 862 | } |
847 | 863 | ||
864 | static const struct osd_attr g_attr_inode_file_layout = ATTR_DEF( | ||
865 | EXOFS_APAGE_FS_DATA, | ||
866 | EXOFS_ATTR_INODE_FILE_LAYOUT, | ||
867 | 0); | ||
868 | static const struct osd_attr g_attr_inode_dir_layout = ATTR_DEF( | ||
869 | EXOFS_APAGE_FS_DATA, | ||
870 | EXOFS_ATTR_INODE_DIR_LAYOUT, | ||
871 | 0); | ||
872 | |||
848 | /* | 873 | /* |
849 | * Read an inode from the OSD, and return it as is. We also return the size | 874 | * Read the Linux inode info from the OSD, and return it as is. In exofs the |
850 | * attribute in the 'sanity' argument if we got compiled with debugging turned | 875 | * inode info is in an application specific page/attribute of the osd-object. |
851 | * on. | ||
852 | */ | 876 | */ |
853 | static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi, | 877 | static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi, |
854 | struct exofs_fcb *inode, uint64_t *sanity) | 878 | struct exofs_fcb *inode) |
855 | { | 879 | { |
856 | struct exofs_sb_info *sbi = sb->s_fs_info; | 880 | struct exofs_sb_info *sbi = sb->s_fs_info; |
857 | struct osd_request *or; | 881 | struct osd_attr attrs[] = { |
858 | struct osd_attr attr; | 882 | [0] = g_attr_inode_data, |
859 | struct osd_obj_id obj = {sbi->s_pid, | 883 | [1] = g_attr_inode_file_layout, |
860 | oi->vfs_inode.i_ino + EXOFS_OBJ_OFF}; | 884 | [2] = g_attr_inode_dir_layout, |
885 | }; | ||
886 | struct exofs_io_state *ios; | ||
887 | struct exofs_on_disk_inode_layout *layout; | ||
861 | int ret; | 888 | int ret; |
862 | 889 | ||
863 | exofs_make_credential(oi->i_cred, &obj); | 890 | ret = exofs_get_io_state(&sbi->layout, &ios); |
864 | 891 | if (unlikely(ret)) { | |
865 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | 892 | EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); |
866 | if (unlikely(!or)) { | 893 | return ret; |
867 | EXOFS_ERR("exofs_get_inode: osd_start_request failed.\n"); | ||
868 | return -ENOMEM; | ||
869 | } | 894 | } |
870 | osd_req_get_attributes(or, &obj); | ||
871 | 895 | ||
872 | /* we need the inode attribute */ | 896 | ios->obj.id = exofs_oi_objno(oi); |
873 | osd_req_add_get_attr_list(or, &g_attr_inode_data, 1); | 897 | exofs_make_credential(oi->i_cred, &ios->obj); |
898 | ios->cred = oi->i_cred; | ||
874 | 899 | ||
875 | #ifdef EXOFS_DEBUG_OBJ_ISIZE | 900 | attrs[1].len = exofs_on_disk_inode_layout_size(sbi->layout.s_numdevs); |
876 | /* we get the size attributes to do a sanity check */ | 901 | attrs[2].len = exofs_on_disk_inode_layout_size(sbi->layout.s_numdevs); |
877 | osd_req_add_get_attr_list(or, &g_attr_logical_length, 1); | ||
878 | #endif | ||
879 | 902 | ||
880 | ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred); | 903 | ios->in_attr = attrs; |
881 | if (ret) | 904 | ios->in_attr_len = ARRAY_SIZE(attrs); |
905 | |||
906 | ret = exofs_sbi_read(ios); | ||
907 | if (unlikely(ret)) { | ||
908 | EXOFS_ERR("object(0x%llx) corrupted, return empty file=>%d\n", | ||
909 | _LLU(ios->obj.id), ret); | ||
910 | memset(inode, 0, sizeof(*inode)); | ||
911 | inode->i_mode = 0040000 | (0777 & ~022); | ||
912 | /* If object is lost on target we might as well enable it's | ||
913 | * delete. | ||
914 | */ | ||
915 | if ((ret == -ENOENT) || (ret == -EINVAL)) | ||
916 | ret = 0; | ||
882 | goto out; | 917 | goto out; |
918 | } | ||
883 | 919 | ||
884 | attr = g_attr_inode_data; | 920 | ret = extract_attr_from_ios(ios, &attrs[0]); |
885 | ret = extract_attr_from_req(or, &attr); | ||
886 | if (ret) { | 921 | if (ret) { |
887 | EXOFS_ERR("exofs_get_inode: extract_attr_from_req failed\n"); | 922 | EXOFS_ERR("%s: extract_attr of inode_data failed\n", __func__); |
888 | goto out; | 923 | goto out; |
889 | } | 924 | } |
925 | WARN_ON(attrs[0].len != EXOFS_INO_ATTR_SIZE); | ||
926 | memcpy(inode, attrs[0].val_ptr, EXOFS_INO_ATTR_SIZE); | ||
890 | 927 | ||
891 | WARN_ON(attr.len != EXOFS_INO_ATTR_SIZE); | 928 | ret = extract_attr_from_ios(ios, &attrs[1]); |
892 | memcpy(inode, attr.val_ptr, EXOFS_INO_ATTR_SIZE); | 929 | if (ret) { |
930 | EXOFS_ERR("%s: extract_attr of inode_data failed\n", __func__); | ||
931 | goto out; | ||
932 | } | ||
933 | if (attrs[1].len) { | ||
934 | layout = attrs[1].val_ptr; | ||
935 | if (layout->gen_func != cpu_to_le16(LAYOUT_MOVING_WINDOW)) { | ||
936 | EXOFS_ERR("%s: unsupported files layout %d\n", | ||
937 | __func__, layout->gen_func); | ||
938 | ret = -ENOTSUPP; | ||
939 | goto out; | ||
940 | } | ||
941 | } | ||
893 | 942 | ||
894 | #ifdef EXOFS_DEBUG_OBJ_ISIZE | 943 | ret = extract_attr_from_ios(ios, &attrs[2]); |
895 | attr = g_attr_logical_length; | ||
896 | ret = extract_attr_from_req(or, &attr); | ||
897 | if (ret) { | 944 | if (ret) { |
898 | EXOFS_ERR("ERROR: extract attr from or failed\n"); | 945 | EXOFS_ERR("%s: extract_attr of inode_data failed\n", __func__); |
899 | goto out; | 946 | goto out; |
900 | } | 947 | } |
901 | *sanity = get_unaligned_be64(attr.val_ptr); | 948 | if (attrs[2].len) { |
902 | #endif | 949 | layout = attrs[2].val_ptr; |
950 | if (layout->gen_func != cpu_to_le16(LAYOUT_MOVING_WINDOW)) { | ||
951 | EXOFS_ERR("%s: unsupported meta-data layout %d\n", | ||
952 | __func__, layout->gen_func); | ||
953 | ret = -ENOTSUPP; | ||
954 | goto out; | ||
955 | } | ||
956 | } | ||
903 | 957 | ||
904 | out: | 958 | out: |
905 | osd_end_request(or); | 959 | exofs_put_io_state(ios); |
906 | return ret; | 960 | return ret; |
907 | } | 961 | } |
908 | 962 | ||
963 | static void __oi_init(struct exofs_i_info *oi) | ||
964 | { | ||
965 | init_waitqueue_head(&oi->i_wq); | ||
966 | oi->i_flags = 0; | ||
967 | } | ||
909 | /* | 968 | /* |
910 | * Fill in an inode read from the OSD and set it up for use | 969 | * Fill in an inode read from the OSD and set it up for use |
911 | */ | 970 | */ |
@@ -914,7 +973,6 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino) | |||
914 | struct exofs_i_info *oi; | 973 | struct exofs_i_info *oi; |
915 | struct exofs_fcb fcb; | 974 | struct exofs_fcb fcb; |
916 | struct inode *inode; | 975 | struct inode *inode; |
917 | uint64_t uninitialized_var(sanity); | ||
918 | int ret; | 976 | int ret; |
919 | 977 | ||
920 | inode = iget_locked(sb, ino); | 978 | inode = iget_locked(sb, ino); |
@@ -923,13 +981,13 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino) | |||
923 | if (!(inode->i_state & I_NEW)) | 981 | if (!(inode->i_state & I_NEW)) |
924 | return inode; | 982 | return inode; |
925 | oi = exofs_i(inode); | 983 | oi = exofs_i(inode); |
984 | __oi_init(oi); | ||
926 | 985 | ||
927 | /* read the inode from the osd */ | 986 | /* read the inode from the osd */ |
928 | ret = exofs_get_inode(sb, oi, &fcb, &sanity); | 987 | ret = exofs_get_inode(sb, oi, &fcb); |
929 | if (ret) | 988 | if (ret) |
930 | goto bad_inode; | 989 | goto bad_inode; |
931 | 990 | ||
932 | init_waitqueue_head(&oi->i_wq); | ||
933 | set_obj_created(oi); | 991 | set_obj_created(oi); |
934 | 992 | ||
935 | /* copy stuff from on-disk struct to in-memory struct */ | 993 | /* copy stuff from on-disk struct to in-memory struct */ |
@@ -947,15 +1005,6 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino) | |||
947 | inode->i_blkbits = EXOFS_BLKSHIFT; | 1005 | inode->i_blkbits = EXOFS_BLKSHIFT; |
948 | inode->i_generation = le32_to_cpu(fcb.i_generation); | 1006 | inode->i_generation = le32_to_cpu(fcb.i_generation); |
949 | 1007 | ||
950 | #ifdef EXOFS_DEBUG_OBJ_ISIZE | ||
951 | if ((inode->i_size != sanity) && | ||
952 | (!exofs_inode_is_fast_symlink(inode))) { | ||
953 | EXOFS_ERR("WARNING: Size of object from inode and " | ||
954 | "attributes differ (%lld != %llu)\n", | ||
955 | inode->i_size, _LLU(sanity)); | ||
956 | } | ||
957 | #endif | ||
958 | |||
959 | oi->i_dir_start_lookup = 0; | 1008 | oi->i_dir_start_lookup = 0; |
960 | 1009 | ||
961 | if ((inode->i_nlink == 0) && (inode->i_mode == 0)) { | 1010 | if ((inode->i_nlink == 0) && (inode->i_mode == 0)) { |
@@ -1020,23 +1069,30 @@ int __exofs_wait_obj_created(struct exofs_i_info *oi) | |||
1020 | * set the obj_created flag so that other methods know that the object exists on | 1069 | * set the obj_created flag so that other methods know that the object exists on |
1021 | * the OSD. | 1070 | * the OSD. |
1022 | */ | 1071 | */ |
1023 | static void create_done(struct osd_request *or, void *p) | 1072 | static void create_done(struct exofs_io_state *ios, void *p) |
1024 | { | 1073 | { |
1025 | struct inode *inode = p; | 1074 | struct inode *inode = p; |
1026 | struct exofs_i_info *oi = exofs_i(inode); | 1075 | struct exofs_i_info *oi = exofs_i(inode); |
1027 | struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; | 1076 | struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; |
1028 | int ret; | 1077 | int ret; |
1029 | 1078 | ||
1030 | ret = exofs_check_ok(or); | 1079 | ret = exofs_check_io(ios, NULL); |
1031 | osd_end_request(or); | 1080 | exofs_put_io_state(ios); |
1081 | |||
1032 | atomic_dec(&sbi->s_curr_pending); | 1082 | atomic_dec(&sbi->s_curr_pending); |
1033 | 1083 | ||
1034 | if (unlikely(ret)) { | 1084 | if (unlikely(ret)) { |
1035 | EXOFS_ERR("object=0x%llx creation faild in pid=0x%llx", | 1085 | EXOFS_ERR("object=0x%llx creation faild in pid=0x%llx", |
1036 | _LLU(sbi->s_pid), _LLU(inode->i_ino + EXOFS_OBJ_OFF)); | 1086 | _LLU(exofs_oi_objno(oi)), _LLU(sbi->layout.s_pid)); |
1037 | make_bad_inode(inode); | 1087 | /*TODO: When FS is corrupted creation can fail, object already |
1038 | } else | 1088 | * exist. Get rid of this asynchronous creation, if exist |
1039 | set_obj_created(oi); | 1089 | * increment the obj counter and try the next object. Until we |
1090 | * succeed. All these dangling objects will be made into lost | ||
1091 | * files by chkfs.exofs | ||
1092 | */ | ||
1093 | } | ||
1094 | |||
1095 | set_obj_created(oi); | ||
1040 | 1096 | ||
1041 | atomic_dec(&inode->i_count); | 1097 | atomic_dec(&inode->i_count); |
1042 | wake_up(&oi->i_wq); | 1098 | wake_up(&oi->i_wq); |
@@ -1051,8 +1107,7 @@ struct inode *exofs_new_inode(struct inode *dir, int mode) | |||
1051 | struct inode *inode; | 1107 | struct inode *inode; |
1052 | struct exofs_i_info *oi; | 1108 | struct exofs_i_info *oi; |
1053 | struct exofs_sb_info *sbi; | 1109 | struct exofs_sb_info *sbi; |
1054 | struct osd_request *or; | 1110 | struct exofs_io_state *ios; |
1055 | struct osd_obj_id obj; | ||
1056 | int ret; | 1111 | int ret; |
1057 | 1112 | ||
1058 | sb = dir->i_sb; | 1113 | sb = dir->i_sb; |
@@ -1061,8 +1116,8 @@ struct inode *exofs_new_inode(struct inode *dir, int mode) | |||
1061 | return ERR_PTR(-ENOMEM); | 1116 | return ERR_PTR(-ENOMEM); |
1062 | 1117 | ||
1063 | oi = exofs_i(inode); | 1118 | oi = exofs_i(inode); |
1119 | __oi_init(oi); | ||
1064 | 1120 | ||
1065 | init_waitqueue_head(&oi->i_wq); | ||
1066 | set_obj_2bcreated(oi); | 1121 | set_obj_2bcreated(oi); |
1067 | 1122 | ||
1068 | sbi = sb->s_fs_info; | 1123 | sbi = sb->s_fs_info; |
@@ -1089,28 +1144,28 @@ struct inode *exofs_new_inode(struct inode *dir, int mode) | |||
1089 | 1144 | ||
1090 | mark_inode_dirty(inode); | 1145 | mark_inode_dirty(inode); |
1091 | 1146 | ||
1092 | obj.partition = sbi->s_pid; | 1147 | ret = exofs_get_io_state(&sbi->layout, &ios); |
1093 | obj.id = inode->i_ino + EXOFS_OBJ_OFF; | 1148 | if (unlikely(ret)) { |
1094 | exofs_make_credential(oi->i_cred, &obj); | 1149 | EXOFS_ERR("exofs_new_inode: exofs_get_io_state failed\n"); |
1095 | 1150 | return ERR_PTR(ret); | |
1096 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | ||
1097 | if (unlikely(!or)) { | ||
1098 | EXOFS_ERR("exofs_new_inode: osd_start_request failed\n"); | ||
1099 | return ERR_PTR(-ENOMEM); | ||
1100 | } | 1151 | } |
1101 | 1152 | ||
1102 | osd_req_create_object(or, &obj); | 1153 | ios->obj.id = exofs_oi_objno(oi); |
1154 | exofs_make_credential(oi->i_cred, &ios->obj); | ||
1103 | 1155 | ||
1104 | /* increment the refcount so that the inode will still be around when we | 1156 | /* increment the refcount so that the inode will still be around when we |
1105 | * reach the callback | 1157 | * reach the callback |
1106 | */ | 1158 | */ |
1107 | atomic_inc(&inode->i_count); | 1159 | atomic_inc(&inode->i_count); |
1108 | 1160 | ||
1109 | ret = exofs_async_op(or, create_done, inode, oi->i_cred); | 1161 | ios->done = create_done; |
1162 | ios->private = inode; | ||
1163 | ios->cred = oi->i_cred; | ||
1164 | ret = exofs_sbi_create(ios); | ||
1110 | if (ret) { | 1165 | if (ret) { |
1111 | atomic_dec(&inode->i_count); | 1166 | atomic_dec(&inode->i_count); |
1112 | osd_end_request(or); | 1167 | exofs_put_io_state(ios); |
1113 | return ERR_PTR(-EIO); | 1168 | return ERR_PTR(ret); |
1114 | } | 1169 | } |
1115 | atomic_inc(&sbi->s_curr_pending); | 1170 | atomic_inc(&sbi->s_curr_pending); |
1116 | 1171 | ||
@@ -1128,11 +1183,11 @@ struct updatei_args { | |||
1128 | /* | 1183 | /* |
1129 | * Callback function from exofs_update_inode(). | 1184 | * Callback function from exofs_update_inode(). |
1130 | */ | 1185 | */ |
1131 | static void updatei_done(struct osd_request *or, void *p) | 1186 | static void updatei_done(struct exofs_io_state *ios, void *p) |
1132 | { | 1187 | { |
1133 | struct updatei_args *args = p; | 1188 | struct updatei_args *args = p; |
1134 | 1189 | ||
1135 | osd_end_request(or); | 1190 | exofs_put_io_state(ios); |
1136 | 1191 | ||
1137 | atomic_dec(&args->sbi->s_curr_pending); | 1192 | atomic_dec(&args->sbi->s_curr_pending); |
1138 | 1193 | ||
@@ -1148,16 +1203,17 @@ static int exofs_update_inode(struct inode *inode, int do_sync) | |||
1148 | struct exofs_i_info *oi = exofs_i(inode); | 1203 | struct exofs_i_info *oi = exofs_i(inode); |
1149 | struct super_block *sb = inode->i_sb; | 1204 | struct super_block *sb = inode->i_sb; |
1150 | struct exofs_sb_info *sbi = sb->s_fs_info; | 1205 | struct exofs_sb_info *sbi = sb->s_fs_info; |
1151 | struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF}; | 1206 | struct exofs_io_state *ios; |
1152 | struct osd_request *or; | ||
1153 | struct osd_attr attr; | 1207 | struct osd_attr attr; |
1154 | struct exofs_fcb *fcb; | 1208 | struct exofs_fcb *fcb; |
1155 | struct updatei_args *args; | 1209 | struct updatei_args *args; |
1156 | int ret; | 1210 | int ret; |
1157 | 1211 | ||
1158 | args = kzalloc(sizeof(*args), GFP_KERNEL); | 1212 | args = kzalloc(sizeof(*args), GFP_KERNEL); |
1159 | if (!args) | 1213 | if (!args) { |
1214 | EXOFS_DBGMSG("Faild kzalloc of args\n"); | ||
1160 | return -ENOMEM; | 1215 | return -ENOMEM; |
1216 | } | ||
1161 | 1217 | ||
1162 | fcb = &args->fcb; | 1218 | fcb = &args->fcb; |
1163 | 1219 | ||
@@ -1186,18 +1242,16 @@ static int exofs_update_inode(struct inode *inode, int do_sync) | |||
1186 | } else | 1242 | } else |
1187 | memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data)); | 1243 | memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data)); |
1188 | 1244 | ||
1189 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | 1245 | ret = exofs_get_io_state(&sbi->layout, &ios); |
1190 | if (unlikely(!or)) { | 1246 | if (unlikely(ret)) { |
1191 | EXOFS_ERR("exofs_update_inode: osd_start_request failed.\n"); | 1247 | EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); |
1192 | ret = -ENOMEM; | ||
1193 | goto free_args; | 1248 | goto free_args; |
1194 | } | 1249 | } |
1195 | 1250 | ||
1196 | osd_req_set_attributes(or, &obj); | ||
1197 | |||
1198 | attr = g_attr_inode_data; | 1251 | attr = g_attr_inode_data; |
1199 | attr.val_ptr = fcb; | 1252 | attr.val_ptr = fcb; |
1200 | osd_req_add_set_attr_list(or, &attr, 1); | 1253 | ios->out_attr_len = 1; |
1254 | ios->out_attr = &attr; | ||
1201 | 1255 | ||
1202 | if (!obj_created(oi)) { | 1256 | if (!obj_created(oi)) { |
1203 | EXOFS_DBGMSG("!obj_created\n"); | 1257 | EXOFS_DBGMSG("!obj_created\n"); |
@@ -1206,43 +1260,42 @@ static int exofs_update_inode(struct inode *inode, int do_sync) | |||
1206 | EXOFS_DBGMSG("wait_event done\n"); | 1260 | EXOFS_DBGMSG("wait_event done\n"); |
1207 | } | 1261 | } |
1208 | 1262 | ||
1209 | if (do_sync) { | 1263 | if (!do_sync) { |
1210 | ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred); | ||
1211 | osd_end_request(or); | ||
1212 | goto free_args; | ||
1213 | } else { | ||
1214 | args->sbi = sbi; | 1264 | args->sbi = sbi; |
1265 | ios->done = updatei_done; | ||
1266 | ios->private = args; | ||
1267 | } | ||
1215 | 1268 | ||
1216 | ret = exofs_async_op(or, updatei_done, args, oi->i_cred); | 1269 | ret = exofs_oi_write(oi, ios); |
1217 | if (ret) { | 1270 | if (!do_sync && !ret) { |
1218 | osd_end_request(or); | ||
1219 | goto free_args; | ||
1220 | } | ||
1221 | atomic_inc(&sbi->s_curr_pending); | 1271 | atomic_inc(&sbi->s_curr_pending); |
1222 | goto out; /* deallocation in updatei_done */ | 1272 | goto out; /* deallocation in updatei_done */ |
1223 | } | 1273 | } |
1224 | 1274 | ||
1275 | exofs_put_io_state(ios); | ||
1225 | free_args: | 1276 | free_args: |
1226 | kfree(args); | 1277 | kfree(args); |
1227 | out: | 1278 | out: |
1228 | EXOFS_DBGMSG("ret=>%d\n", ret); | 1279 | EXOFS_DBGMSG("(0x%lx) do_sync=%d ret=>%d\n", |
1280 | inode->i_ino, do_sync, ret); | ||
1229 | return ret; | 1281 | return ret; |
1230 | } | 1282 | } |
1231 | 1283 | ||
1232 | int exofs_write_inode(struct inode *inode, int wait) | 1284 | int exofs_write_inode(struct inode *inode, struct writeback_control *wbc) |
1233 | { | 1285 | { |
1234 | return exofs_update_inode(inode, wait); | 1286 | return exofs_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL); |
1235 | } | 1287 | } |
1236 | 1288 | ||
1237 | /* | 1289 | /* |
1238 | * Callback function from exofs_delete_inode() - don't have much cleaning up to | 1290 | * Callback function from exofs_delete_inode() - don't have much cleaning up to |
1239 | * do. | 1291 | * do. |
1240 | */ | 1292 | */ |
1241 | static void delete_done(struct osd_request *or, void *p) | 1293 | static void delete_done(struct exofs_io_state *ios, void *p) |
1242 | { | 1294 | { |
1243 | struct exofs_sb_info *sbi; | 1295 | struct exofs_sb_info *sbi = p; |
1244 | osd_end_request(or); | 1296 | |
1245 | sbi = p; | 1297 | exofs_put_io_state(ios); |
1298 | |||
1246 | atomic_dec(&sbi->s_curr_pending); | 1299 | atomic_dec(&sbi->s_curr_pending); |
1247 | } | 1300 | } |
1248 | 1301 | ||
@@ -1256,8 +1309,7 @@ void exofs_delete_inode(struct inode *inode) | |||
1256 | struct exofs_i_info *oi = exofs_i(inode); | 1309 | struct exofs_i_info *oi = exofs_i(inode); |
1257 | struct super_block *sb = inode->i_sb; | 1310 | struct super_block *sb = inode->i_sb; |
1258 | struct exofs_sb_info *sbi = sb->s_fs_info; | 1311 | struct exofs_sb_info *sbi = sb->s_fs_info; |
1259 | struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF}; | 1312 | struct exofs_io_state *ios; |
1260 | struct osd_request *or; | ||
1261 | int ret; | 1313 | int ret; |
1262 | 1314 | ||
1263 | truncate_inode_pages(&inode->i_data, 0); | 1315 | truncate_inode_pages(&inode->i_data, 0); |
@@ -1274,25 +1326,26 @@ void exofs_delete_inode(struct inode *inode) | |||
1274 | 1326 | ||
1275 | clear_inode(inode); | 1327 | clear_inode(inode); |
1276 | 1328 | ||
1277 | or = osd_start_request(sbi->s_dev, GFP_KERNEL); | 1329 | ret = exofs_get_io_state(&sbi->layout, &ios); |
1278 | if (unlikely(!or)) { | 1330 | if (unlikely(ret)) { |
1279 | EXOFS_ERR("exofs_delete_inode: osd_start_request failed\n"); | 1331 | EXOFS_ERR("%s: exofs_get_io_state failed\n", __func__); |
1280 | return; | 1332 | return; |
1281 | } | 1333 | } |
1282 | 1334 | ||
1283 | osd_req_remove_object(or, &obj); | ||
1284 | |||
1285 | /* if we are deleting an obj that hasn't been created yet, wait */ | 1335 | /* if we are deleting an obj that hasn't been created yet, wait */ |
1286 | if (!obj_created(oi)) { | 1336 | if (!obj_created(oi)) { |
1287 | BUG_ON(!obj_2bcreated(oi)); | 1337 | BUG_ON(!obj_2bcreated(oi)); |
1288 | wait_event(oi->i_wq, obj_created(oi)); | 1338 | wait_event(oi->i_wq, obj_created(oi)); |
1289 | } | 1339 | } |
1290 | 1340 | ||
1291 | ret = exofs_async_op(or, delete_done, sbi, oi->i_cred); | 1341 | ios->obj.id = exofs_oi_objno(oi); |
1342 | ios->done = delete_done; | ||
1343 | ios->private = sbi; | ||
1344 | ios->cred = oi->i_cred; | ||
1345 | ret = exofs_sbi_remove(ios); | ||
1292 | if (ret) { | 1346 | if (ret) { |
1293 | EXOFS_ERR( | 1347 | EXOFS_ERR("%s: exofs_sbi_remove failed\n", __func__); |
1294 | "ERROR: @exofs_delete_inode exofs_async_op failed\n"); | 1348 | exofs_put_io_state(ios); |
1295 | osd_end_request(or); | ||
1296 | return; | 1349 | return; |
1297 | } | 1350 | } |
1298 | atomic_inc(&sbi->s_curr_pending); | 1351 | atomic_inc(&sbi->s_curr_pending); |