diff options
author | Boaz Harrosh <bharrosh@panasas.com> | 2008-10-27 13:31:34 -0400 |
---|---|---|
committer | Boaz Harrosh <bharrosh@panasas.com> | 2009-03-31 12:44:29 -0400 |
commit | beaec07ba6af35d387643b76a2920a7a6e22207b (patch) | |
tree | 74ffd4738198424f698ae238e4d3164938ef5af7 /fs/exofs | |
parent | 982980d753223fda3864038236b7b94e246895cb (diff) |
exofs: address_space_operations
OK Now we start to read and write from osd-objects. We try to
collect at most contiguous pages as possible in a single write/read.
The first page index is the object's offset.
TODO:
In 64-bit a single bio can carry at most 128 pages.
Add support of chaining multiple bios
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Diffstat (limited to 'fs/exofs')
-rw-r--r-- | fs/exofs/exofs.h | 6 | ||||
-rw-r--r-- | fs/exofs/inode.c | 697 |
2 files changed, 703 insertions, 0 deletions
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h index 825454d76f6c..0276242326ab 100644 --- a/fs/exofs/exofs.h +++ b/fs/exofs/exofs.h | |||
@@ -130,6 +130,9 @@ static inline struct exofs_i_info *exofs_i(struct inode *inode) | |||
130 | /* inode.c */ | 130 | /* inode.c */ |
131 | void exofs_truncate(struct inode *inode); | 131 | void exofs_truncate(struct inode *inode); |
132 | int exofs_setattr(struct dentry *, struct iattr *); | 132 | int exofs_setattr(struct dentry *, struct iattr *); |
133 | int exofs_write_begin(struct file *file, struct address_space *mapping, | ||
134 | loff_t pos, unsigned len, unsigned flags, | ||
135 | struct page **pagep, void **fsdata); | ||
133 | 136 | ||
134 | /********************* | 137 | /********************* |
135 | * operation vectors * | 138 | * operation vectors * |
@@ -138,6 +141,9 @@ int exofs_setattr(struct dentry *, struct iattr *); | |||
138 | extern const struct inode_operations exofs_file_inode_operations; | 141 | extern const struct inode_operations exofs_file_inode_operations; |
139 | extern const struct file_operations exofs_file_operations; | 142 | extern const struct file_operations exofs_file_operations; |
140 | 143 | ||
144 | /* inode.c */ | ||
145 | extern const struct address_space_operations exofs_aops; | ||
146 | |||
141 | /* symlink.c */ | 147 | /* symlink.c */ |
142 | extern const struct inode_operations exofs_symlink_inode_operations; | 148 | extern const struct inode_operations exofs_symlink_inode_operations; |
143 | extern const struct inode_operations exofs_fast_symlink_inode_operations; | 149 | extern const struct inode_operations exofs_fast_symlink_inode_operations; |
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index b0bda1e91225..a3691d8bfb98 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c | |||
@@ -35,6 +35,7 @@ | |||
35 | 35 | ||
36 | #include <linux/writeback.h> | 36 | #include <linux/writeback.h> |
37 | #include <linux/buffer_head.h> | 37 | #include <linux/buffer_head.h> |
38 | #include <scsi/scsi_device.h> | ||
38 | 39 | ||
39 | #include "exofs.h" | 40 | #include "exofs.h" |
40 | 41 | ||
@@ -42,6 +43,702 @@ | |||
42 | # define EXOFS_DEBUG_OBJ_ISIZE 1 | 43 | # define EXOFS_DEBUG_OBJ_ISIZE 1 |
43 | #endif | 44 | #endif |
44 | 45 | ||
46 | struct page_collect { | ||
47 | struct exofs_sb_info *sbi; | ||
48 | struct request_queue *req_q; | ||
49 | struct inode *inode; | ||
50 | unsigned expected_pages; | ||
51 | |||
52 | struct bio *bio; | ||
53 | unsigned nr_pages; | ||
54 | unsigned long length; | ||
55 | loff_t pg_first; /* keep 64bit also in 32-arches */ | ||
56 | }; | ||
57 | |||
58 | static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, | ||
59 | struct inode *inode) | ||
60 | { | ||
61 | struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; | ||
62 | struct request_queue *req_q = sbi->s_dev->scsi_device->request_queue; | ||
63 | |||
64 | pcol->sbi = sbi; | ||
65 | pcol->req_q = req_q; | ||
66 | pcol->inode = inode; | ||
67 | pcol->expected_pages = expected_pages; | ||
68 | |||
69 | pcol->bio = NULL; | ||
70 | pcol->nr_pages = 0; | ||
71 | pcol->length = 0; | ||
72 | pcol->pg_first = -1; | ||
73 | |||
74 | EXOFS_DBGMSG("_pcol_init ino=0x%lx expected_pages=%u\n", inode->i_ino, | ||
75 | expected_pages); | ||
76 | } | ||
77 | |||
78 | static void _pcol_reset(struct page_collect *pcol) | ||
79 | { | ||
80 | pcol->expected_pages -= min(pcol->nr_pages, pcol->expected_pages); | ||
81 | |||
82 | pcol->bio = NULL; | ||
83 | pcol->nr_pages = 0; | ||
84 | pcol->length = 0; | ||
85 | pcol->pg_first = -1; | ||
86 | EXOFS_DBGMSG("_pcol_reset ino=0x%lx expected_pages=%u\n", | ||
87 | pcol->inode->i_ino, pcol->expected_pages); | ||
88 | |||
89 | /* this is probably the end of the loop but in writes | ||
90 | * it might not end here. don't be left with nothing | ||
91 | */ | ||
92 | if (!pcol->expected_pages) | ||
93 | pcol->expected_pages = 128; | ||
94 | } | ||
95 | |||
96 | static int pcol_try_alloc(struct page_collect *pcol) | ||
97 | { | ||
98 | int pages = min_t(unsigned, pcol->expected_pages, BIO_MAX_PAGES); | ||
99 | |||
100 | for (; pages; pages >>= 1) { | ||
101 | pcol->bio = bio_alloc(GFP_KERNEL, pages); | ||
102 | if (likely(pcol->bio)) | ||
103 | return 0; | ||
104 | } | ||
105 | |||
106 | EXOFS_ERR("Failed to kcalloc expected_pages=%u\n", | ||
107 | pcol->expected_pages); | ||
108 | return -ENOMEM; | ||
109 | } | ||
110 | |||
111 | static void pcol_free(struct page_collect *pcol) | ||
112 | { | ||
113 | bio_put(pcol->bio); | ||
114 | pcol->bio = NULL; | ||
115 | } | ||
116 | |||
117 | static int pcol_add_page(struct page_collect *pcol, struct page *page, | ||
118 | unsigned len) | ||
119 | { | ||
120 | int added_len = bio_add_pc_page(pcol->req_q, pcol->bio, page, len, 0); | ||
121 | if (unlikely(len != added_len)) | ||
122 | return -ENOMEM; | ||
123 | |||
124 | ++pcol->nr_pages; | ||
125 | pcol->length += len; | ||
126 | return 0; | ||
127 | } | ||
128 | |||
129 | static int update_read_page(struct page *page, int ret) | ||
130 | { | ||
131 | if (ret == 0) { | ||
132 | /* Everything is OK */ | ||
133 | SetPageUptodate(page); | ||
134 | if (PageError(page)) | ||
135 | ClearPageError(page); | ||
136 | } else if (ret == -EFAULT) { | ||
137 | /* In this case we were trying to read something that wasn't on | ||
138 | * disk yet - return a page full of zeroes. This should be OK, | ||
139 | * because the object should be empty (if there was a write | ||
140 | * before this read, the read would be waiting with the page | ||
141 | * locked */ | ||
142 | clear_highpage(page); | ||
143 | |||
144 | SetPageUptodate(page); | ||
145 | if (PageError(page)) | ||
146 | ClearPageError(page); | ||
147 | ret = 0; /* recovered error */ | ||
148 | EXOFS_DBGMSG("recovered read error\n"); | ||
149 | } else /* Error */ | ||
150 | SetPageError(page); | ||
151 | |||
152 | return ret; | ||
153 | } | ||
154 | |||
155 | static void update_write_page(struct page *page, int ret) | ||
156 | { | ||
157 | if (ret) { | ||
158 | mapping_set_error(page->mapping, ret); | ||
159 | SetPageError(page); | ||
160 | } | ||
161 | end_page_writeback(page); | ||
162 | } | ||
163 | |||
164 | /* Called at the end of reads, to optionally unlock pages and update their | ||
165 | * status. | ||
166 | */ | ||
167 | static int __readpages_done(struct osd_request *or, struct page_collect *pcol, | ||
168 | bool do_unlock) | ||
169 | { | ||
170 | struct bio_vec *bvec; | ||
171 | int i; | ||
172 | u64 resid; | ||
173 | u64 good_bytes; | ||
174 | u64 length = 0; | ||
175 | int ret = exofs_check_ok_resid(or, &resid, NULL); | ||
176 | |||
177 | osd_end_request(or); | ||
178 | |||
179 | if (likely(!ret)) | ||
180 | good_bytes = pcol->length; | ||
181 | else if (!resid) | ||
182 | good_bytes = 0; | ||
183 | else | ||
184 | good_bytes = pcol->length - resid; | ||
185 | |||
186 | EXOFS_DBGMSG("readpages_done(0x%lx) good_bytes=0x%llx" | ||
187 | " length=0x%lx nr_pages=%u\n", | ||
188 | pcol->inode->i_ino, _LLU(good_bytes), pcol->length, | ||
189 | pcol->nr_pages); | ||
190 | |||
191 | __bio_for_each_segment(bvec, pcol->bio, i, 0) { | ||
192 | struct page *page = bvec->bv_page; | ||
193 | struct inode *inode = page->mapping->host; | ||
194 | int page_stat; | ||
195 | |||
196 | if (inode != pcol->inode) | ||
197 | continue; /* osd might add more pages at end */ | ||
198 | |||
199 | if (likely(length < good_bytes)) | ||
200 | page_stat = 0; | ||
201 | else | ||
202 | page_stat = ret; | ||
203 | |||
204 | EXOFS_DBGMSG(" readpages_done(0x%lx, 0x%lx) %s\n", | ||
205 | inode->i_ino, page->index, | ||
206 | page_stat ? "bad_bytes" : "good_bytes"); | ||
207 | |||
208 | ret = update_read_page(page, page_stat); | ||
209 | if (do_unlock) | ||
210 | unlock_page(page); | ||
211 | length += bvec->bv_len; | ||
212 | } | ||
213 | |||
214 | pcol_free(pcol); | ||
215 | EXOFS_DBGMSG("readpages_done END\n"); | ||
216 | return ret; | ||
217 | } | ||
218 | |||
219 | /* callback of async reads */ | ||
220 | static void readpages_done(struct osd_request *or, void *p) | ||
221 | { | ||
222 | struct page_collect *pcol = p; | ||
223 | |||
224 | __readpages_done(or, pcol, true); | ||
225 | atomic_dec(&pcol->sbi->s_curr_pending); | ||
226 | kfree(p); | ||
227 | } | ||
228 | |||
229 | static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw) | ||
230 | { | ||
231 | struct bio_vec *bvec; | ||
232 | int i; | ||
233 | |||
234 | __bio_for_each_segment(bvec, pcol->bio, i, 0) { | ||
235 | struct page *page = bvec->bv_page; | ||
236 | |||
237 | if (rw == READ) | ||
238 | update_read_page(page, ret); | ||
239 | else | ||
240 | update_write_page(page, ret); | ||
241 | |||
242 | unlock_page(page); | ||
243 | } | ||
244 | pcol_free(pcol); | ||
245 | } | ||
246 | |||
247 | static int read_exec(struct page_collect *pcol, bool is_sync) | ||
248 | { | ||
249 | struct exofs_i_info *oi = exofs_i(pcol->inode); | ||
250 | struct osd_obj_id obj = {pcol->sbi->s_pid, | ||
251 | pcol->inode->i_ino + EXOFS_OBJ_OFF}; | ||
252 | struct osd_request *or = NULL; | ||
253 | struct page_collect *pcol_copy = NULL; | ||
254 | loff_t i_start = pcol->pg_first << PAGE_CACHE_SHIFT; | ||
255 | int ret; | ||
256 | |||
257 | if (!pcol->bio) | ||
258 | return 0; | ||
259 | |||
260 | /* see comment in _readpage() about sync reads */ | ||
261 | WARN_ON(is_sync && (pcol->nr_pages != 1)); | ||
262 | |||
263 | or = osd_start_request(pcol->sbi->s_dev, GFP_KERNEL); | ||
264 | if (unlikely(!or)) { | ||
265 | ret = -ENOMEM; | ||
266 | goto err; | ||
267 | } | ||
268 | |||
269 | osd_req_read(or, &obj, pcol->bio, i_start); | ||
270 | |||
271 | if (is_sync) { | ||
272 | exofs_sync_op(or, pcol->sbi->s_timeout, oi->i_cred); | ||
273 | return __readpages_done(or, pcol, false); | ||
274 | } | ||
275 | |||
276 | pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); | ||
277 | if (!pcol_copy) { | ||
278 | ret = -ENOMEM; | ||
279 | goto err; | ||
280 | } | ||
281 | |||
282 | *pcol_copy = *pcol; | ||
283 | ret = exofs_async_op(or, readpages_done, pcol_copy, oi->i_cred); | ||
284 | if (unlikely(ret)) | ||
285 | goto err; | ||
286 | |||
287 | atomic_inc(&pcol->sbi->s_curr_pending); | ||
288 | |||
289 | EXOFS_DBGMSG("read_exec obj=0x%llx start=0x%llx length=0x%lx\n", | ||
290 | obj.id, _LLU(i_start), pcol->length); | ||
291 | |||
292 | /* pages ownership was passed to pcol_copy */ | ||
293 | _pcol_reset(pcol); | ||
294 | return 0; | ||
295 | |||
296 | err: | ||
297 | if (!is_sync) | ||
298 | _unlock_pcol_pages(pcol, ret, READ); | ||
299 | kfree(pcol_copy); | ||
300 | if (or) | ||
301 | osd_end_request(or); | ||
302 | return ret; | ||
303 | } | ||
304 | |||
305 | /* readpage_strip is called either directly from readpage() or by the VFS from | ||
306 | * within read_cache_pages(), to add one more page to be read. It will try to | ||
307 | * collect as many contiguous pages as posible. If a discontinuity is | ||
308 | * encountered, or it runs out of resources, it will submit the previous segment | ||
309 | * and will start a new collection. Eventually caller must submit the last | ||
310 | * segment if present. | ||
311 | */ | ||
312 | static int readpage_strip(void *data, struct page *page) | ||
313 | { | ||
314 | struct page_collect *pcol = data; | ||
315 | struct inode *inode = pcol->inode; | ||
316 | struct exofs_i_info *oi = exofs_i(inode); | ||
317 | loff_t i_size = i_size_read(inode); | ||
318 | pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; | ||
319 | size_t len; | ||
320 | int ret; | ||
321 | |||
322 | /* FIXME: Just for debugging, will be removed */ | ||
323 | if (PageUptodate(page)) | ||
324 | EXOFS_ERR("PageUptodate(0x%lx, 0x%lx)\n", pcol->inode->i_ino, | ||
325 | page->index); | ||
326 | |||
327 | if (page->index < end_index) | ||
328 | len = PAGE_CACHE_SIZE; | ||
329 | else if (page->index == end_index) | ||
330 | len = i_size & ~PAGE_CACHE_MASK; | ||
331 | else | ||
332 | len = 0; | ||
333 | |||
334 | if (!len || !obj_created(oi)) { | ||
335 | /* this will be out of bounds, or doesn't exist yet. | ||
336 | * Current page is cleared and the request is split | ||
337 | */ | ||
338 | clear_highpage(page); | ||
339 | |||
340 | SetPageUptodate(page); | ||
341 | if (PageError(page)) | ||
342 | ClearPageError(page); | ||
343 | |||
344 | unlock_page(page); | ||
345 | EXOFS_DBGMSG("readpage_strip(0x%lx, 0x%lx) empty page," | ||
346 | " splitting\n", inode->i_ino, page->index); | ||
347 | |||
348 | return read_exec(pcol, false); | ||
349 | } | ||
350 | |||
351 | try_again: | ||
352 | |||
353 | if (unlikely(pcol->pg_first == -1)) { | ||
354 | pcol->pg_first = page->index; | ||
355 | } else if (unlikely((pcol->pg_first + pcol->nr_pages) != | ||
356 | page->index)) { | ||
357 | /* Discontinuity detected, split the request */ | ||
358 | ret = read_exec(pcol, false); | ||
359 | if (unlikely(ret)) | ||
360 | goto fail; | ||
361 | goto try_again; | ||
362 | } | ||
363 | |||
364 | if (!pcol->bio) { | ||
365 | ret = pcol_try_alloc(pcol); | ||
366 | if (unlikely(ret)) | ||
367 | goto fail; | ||
368 | } | ||
369 | |||
370 | if (len != PAGE_CACHE_SIZE) | ||
371 | zero_user(page, len, PAGE_CACHE_SIZE - len); | ||
372 | |||
373 | EXOFS_DBGMSG(" readpage_strip(0x%lx, 0x%lx) len=0x%zx\n", | ||
374 | inode->i_ino, page->index, len); | ||
375 | |||
376 | ret = pcol_add_page(pcol, page, len); | ||
377 | if (ret) { | ||
378 | EXOFS_DBGMSG("Failed pcol_add_page pages[i]=%p " | ||
379 | "this_len=0x%zx nr_pages=%u length=0x%lx\n", | ||
380 | page, len, pcol->nr_pages, pcol->length); | ||
381 | |||
382 | /* split the request, and start again with current page */ | ||
383 | ret = read_exec(pcol, false); | ||
384 | if (unlikely(ret)) | ||
385 | goto fail; | ||
386 | |||
387 | goto try_again; | ||
388 | } | ||
389 | |||
390 | return 0; | ||
391 | |||
392 | fail: | ||
393 | /* SetPageError(page); ??? */ | ||
394 | unlock_page(page); | ||
395 | return ret; | ||
396 | } | ||
397 | |||
398 | static int exofs_readpages(struct file *file, struct address_space *mapping, | ||
399 | struct list_head *pages, unsigned nr_pages) | ||
400 | { | ||
401 | struct page_collect pcol; | ||
402 | int ret; | ||
403 | |||
404 | _pcol_init(&pcol, nr_pages, mapping->host); | ||
405 | |||
406 | ret = read_cache_pages(mapping, pages, readpage_strip, &pcol); | ||
407 | if (ret) { | ||
408 | EXOFS_ERR("read_cache_pages => %d\n", ret); | ||
409 | return ret; | ||
410 | } | ||
411 | |||
412 | return read_exec(&pcol, false); | ||
413 | } | ||
414 | |||
415 | static int _readpage(struct page *page, bool is_sync) | ||
416 | { | ||
417 | struct page_collect pcol; | ||
418 | int ret; | ||
419 | |||
420 | _pcol_init(&pcol, 1, page->mapping->host); | ||
421 | |||
422 | /* readpage_strip might call read_exec(,async) inside at several places | ||
423 | * but this is safe for is_async=0 since read_exec will not do anything | ||
424 | * when we have a single page. | ||
425 | */ | ||
426 | ret = readpage_strip(&pcol, page); | ||
427 | if (ret) { | ||
428 | EXOFS_ERR("_readpage => %d\n", ret); | ||
429 | return ret; | ||
430 | } | ||
431 | |||
432 | return read_exec(&pcol, is_sync); | ||
433 | } | ||
434 | |||
435 | /* | ||
436 | * We don't need the file | ||
437 | */ | ||
438 | static int exofs_readpage(struct file *file, struct page *page) | ||
439 | { | ||
440 | return _readpage(page, false); | ||
441 | } | ||
442 | |||
443 | /* Callback for osd_write. All writes are asynchronouse */ | ||
444 | static void writepages_done(struct osd_request *or, void *p) | ||
445 | { | ||
446 | struct page_collect *pcol = p; | ||
447 | struct bio_vec *bvec; | ||
448 | int i; | ||
449 | u64 resid; | ||
450 | u64 good_bytes; | ||
451 | u64 length = 0; | ||
452 | |||
453 | int ret = exofs_check_ok_resid(or, NULL, &resid); | ||
454 | |||
455 | osd_end_request(or); | ||
456 | atomic_dec(&pcol->sbi->s_curr_pending); | ||
457 | |||
458 | if (likely(!ret)) | ||
459 | good_bytes = pcol->length; | ||
460 | else if (!resid) | ||
461 | good_bytes = 0; | ||
462 | else | ||
463 | good_bytes = pcol->length - resid; | ||
464 | |||
465 | EXOFS_DBGMSG("writepages_done(0x%lx) good_bytes=0x%llx" | ||
466 | " length=0x%lx nr_pages=%u\n", | ||
467 | pcol->inode->i_ino, _LLU(good_bytes), pcol->length, | ||
468 | pcol->nr_pages); | ||
469 | |||
470 | __bio_for_each_segment(bvec, pcol->bio, i, 0) { | ||
471 | struct page *page = bvec->bv_page; | ||
472 | struct inode *inode = page->mapping->host; | ||
473 | int page_stat; | ||
474 | |||
475 | if (inode != pcol->inode) | ||
476 | continue; /* osd might add more pages to a bio */ | ||
477 | |||
478 | if (likely(length < good_bytes)) | ||
479 | page_stat = 0; | ||
480 | else | ||
481 | page_stat = ret; | ||
482 | |||
483 | update_write_page(page, page_stat); | ||
484 | unlock_page(page); | ||
485 | EXOFS_DBGMSG(" writepages_done(0x%lx, 0x%lx) status=%d\n", | ||
486 | inode->i_ino, page->index, page_stat); | ||
487 | |||
488 | length += bvec->bv_len; | ||
489 | } | ||
490 | |||
491 | pcol_free(pcol); | ||
492 | kfree(pcol); | ||
493 | EXOFS_DBGMSG("writepages_done END\n"); | ||
494 | } | ||
495 | |||
496 | static int write_exec(struct page_collect *pcol) | ||
497 | { | ||
498 | struct exofs_i_info *oi = exofs_i(pcol->inode); | ||
499 | struct osd_obj_id obj = {pcol->sbi->s_pid, | ||
500 | pcol->inode->i_ino + EXOFS_OBJ_OFF}; | ||
501 | struct osd_request *or = NULL; | ||
502 | struct page_collect *pcol_copy = NULL; | ||
503 | loff_t i_start = pcol->pg_first << PAGE_CACHE_SHIFT; | ||
504 | int ret; | ||
505 | |||
506 | if (!pcol->bio) | ||
507 | return 0; | ||
508 | |||
509 | or = osd_start_request(pcol->sbi->s_dev, GFP_KERNEL); | ||
510 | if (unlikely(!or)) { | ||
511 | EXOFS_ERR("write_exec: Faild to osd_start_request()\n"); | ||
512 | ret = -ENOMEM; | ||
513 | goto err; | ||
514 | } | ||
515 | |||
516 | pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); | ||
517 | if (!pcol_copy) { | ||
518 | EXOFS_ERR("write_exec: Faild to kmalloc(pcol)\n"); | ||
519 | ret = -ENOMEM; | ||
520 | goto err; | ||
521 | } | ||
522 | |||
523 | *pcol_copy = *pcol; | ||
524 | |||
525 | osd_req_write(or, &obj, pcol_copy->bio, i_start); | ||
526 | ret = exofs_async_op(or, writepages_done, pcol_copy, oi->i_cred); | ||
527 | if (unlikely(ret)) { | ||
528 | EXOFS_ERR("write_exec: exofs_async_op() Faild\n"); | ||
529 | goto err; | ||
530 | } | ||
531 | |||
532 | atomic_inc(&pcol->sbi->s_curr_pending); | ||
533 | EXOFS_DBGMSG("write_exec(0x%lx, 0x%llx) start=0x%llx length=0x%lx\n", | ||
534 | pcol->inode->i_ino, pcol->pg_first, _LLU(i_start), | ||
535 | pcol->length); | ||
536 | /* pages ownership was passed to pcol_copy */ | ||
537 | _pcol_reset(pcol); | ||
538 | return 0; | ||
539 | |||
540 | err: | ||
541 | _unlock_pcol_pages(pcol, ret, WRITE); | ||
542 | kfree(pcol_copy); | ||
543 | if (or) | ||
544 | osd_end_request(or); | ||
545 | return ret; | ||
546 | } | ||
547 | |||
548 | /* writepage_strip is called either directly from writepage() or by the VFS from | ||
549 | * within write_cache_pages(), to add one more page to be written to storage. | ||
550 | * It will try to collect as many contiguous pages as possible. If a | ||
551 | * discontinuity is encountered or it runs out of resources it will submit the | ||
552 | * previous segment and will start a new collection. | ||
553 | * Eventually caller must submit the last segment if present. | ||
554 | */ | ||
555 | static int writepage_strip(struct page *page, | ||
556 | struct writeback_control *wbc_unused, void *data) | ||
557 | { | ||
558 | struct page_collect *pcol = data; | ||
559 | struct inode *inode = pcol->inode; | ||
560 | struct exofs_i_info *oi = exofs_i(inode); | ||
561 | loff_t i_size = i_size_read(inode); | ||
562 | pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; | ||
563 | size_t len; | ||
564 | int ret; | ||
565 | |||
566 | BUG_ON(!PageLocked(page)); | ||
567 | |||
568 | ret = wait_obj_created(oi); | ||
569 | if (unlikely(ret)) | ||
570 | goto fail; | ||
571 | |||
572 | if (page->index < end_index) | ||
573 | /* in this case, the page is within the limits of the file */ | ||
574 | len = PAGE_CACHE_SIZE; | ||
575 | else { | ||
576 | len = i_size & ~PAGE_CACHE_MASK; | ||
577 | |||
578 | if (page->index > end_index || !len) { | ||
579 | /* in this case, the page is outside the limits | ||
580 | * (truncate in progress) | ||
581 | */ | ||
582 | ret = write_exec(pcol); | ||
583 | if (unlikely(ret)) | ||
584 | goto fail; | ||
585 | if (PageError(page)) | ||
586 | ClearPageError(page); | ||
587 | unlock_page(page); | ||
588 | return 0; | ||
589 | } | ||
590 | } | ||
591 | |||
592 | try_again: | ||
593 | |||
594 | if (unlikely(pcol->pg_first == -1)) { | ||
595 | pcol->pg_first = page->index; | ||
596 | } else if (unlikely((pcol->pg_first + pcol->nr_pages) != | ||
597 | page->index)) { | ||
598 | /* Discontinuity detected, split the request */ | ||
599 | ret = write_exec(pcol); | ||
600 | if (unlikely(ret)) | ||
601 | goto fail; | ||
602 | goto try_again; | ||
603 | } | ||
604 | |||
605 | if (!pcol->bio) { | ||
606 | ret = pcol_try_alloc(pcol); | ||
607 | if (unlikely(ret)) | ||
608 | goto fail; | ||
609 | } | ||
610 | |||
611 | EXOFS_DBGMSG(" writepage_strip(0x%lx, 0x%lx) len=0x%zx\n", | ||
612 | inode->i_ino, page->index, len); | ||
613 | |||
614 | ret = pcol_add_page(pcol, page, len); | ||
615 | if (unlikely(ret)) { | ||
616 | EXOFS_DBGMSG("Failed pcol_add_page " | ||
617 | "nr_pages=%u total_length=0x%lx\n", | ||
618 | pcol->nr_pages, pcol->length); | ||
619 | |||
620 | /* split the request, next loop will start again */ | ||
621 | ret = write_exec(pcol); | ||
622 | if (unlikely(ret)) { | ||
623 | EXOFS_DBGMSG("write_exec faild => %d", ret); | ||
624 | goto fail; | ||
625 | } | ||
626 | |||
627 | goto try_again; | ||
628 | } | ||
629 | |||
630 | BUG_ON(PageWriteback(page)); | ||
631 | set_page_writeback(page); | ||
632 | |||
633 | return 0; | ||
634 | |||
635 | fail: | ||
636 | set_bit(AS_EIO, &page->mapping->flags); | ||
637 | unlock_page(page); | ||
638 | return ret; | ||
639 | } | ||
640 | |||
641 | static int exofs_writepages(struct address_space *mapping, | ||
642 | struct writeback_control *wbc) | ||
643 | { | ||
644 | struct page_collect pcol; | ||
645 | long start, end, expected_pages; | ||
646 | int ret; | ||
647 | |||
648 | start = wbc->range_start >> PAGE_CACHE_SHIFT; | ||
649 | end = (wbc->range_end == LLONG_MAX) ? | ||
650 | start + mapping->nrpages : | ||
651 | wbc->range_end >> PAGE_CACHE_SHIFT; | ||
652 | |||
653 | if (start || end) | ||
654 | expected_pages = min(end - start + 1, 32L); | ||
655 | else | ||
656 | expected_pages = mapping->nrpages; | ||
657 | |||
658 | EXOFS_DBGMSG("inode(0x%lx) wbc->start=0x%llx wbc->end=0x%llx" | ||
659 | " m->nrpages=%lu start=0x%lx end=0x%lx\n", | ||
660 | mapping->host->i_ino, wbc->range_start, wbc->range_end, | ||
661 | mapping->nrpages, start, end); | ||
662 | |||
663 | _pcol_init(&pcol, expected_pages, mapping->host); | ||
664 | |||
665 | ret = write_cache_pages(mapping, wbc, writepage_strip, &pcol); | ||
666 | if (ret) { | ||
667 | EXOFS_ERR("write_cache_pages => %d\n", ret); | ||
668 | return ret; | ||
669 | } | ||
670 | |||
671 | return write_exec(&pcol); | ||
672 | } | ||
673 | |||
674 | static int exofs_writepage(struct page *page, struct writeback_control *wbc) | ||
675 | { | ||
676 | struct page_collect pcol; | ||
677 | int ret; | ||
678 | |||
679 | _pcol_init(&pcol, 1, page->mapping->host); | ||
680 | |||
681 | ret = writepage_strip(page, NULL, &pcol); | ||
682 | if (ret) { | ||
683 | EXOFS_ERR("exofs_writepage => %d\n", ret); | ||
684 | return ret; | ||
685 | } | ||
686 | |||
687 | return write_exec(&pcol); | ||
688 | } | ||
689 | |||
690 | int exofs_write_begin(struct file *file, struct address_space *mapping, | ||
691 | loff_t pos, unsigned len, unsigned flags, | ||
692 | struct page **pagep, void **fsdata) | ||
693 | { | ||
694 | int ret = 0; | ||
695 | struct page *page; | ||
696 | |||
697 | page = *pagep; | ||
698 | if (page == NULL) { | ||
699 | ret = simple_write_begin(file, mapping, pos, len, flags, pagep, | ||
700 | fsdata); | ||
701 | if (ret) { | ||
702 | EXOFS_DBGMSG("simple_write_begin faild\n"); | ||
703 | return ret; | ||
704 | } | ||
705 | |||
706 | page = *pagep; | ||
707 | } | ||
708 | |||
709 | /* read modify write */ | ||
710 | if (!PageUptodate(page) && (len != PAGE_CACHE_SIZE)) { | ||
711 | ret = _readpage(page, true); | ||
712 | if (ret) { | ||
713 | /*SetPageError was done by _readpage. Is it ok?*/ | ||
714 | unlock_page(page); | ||
715 | EXOFS_DBGMSG("__readpage_filler faild\n"); | ||
716 | } | ||
717 | } | ||
718 | |||
719 | return ret; | ||
720 | } | ||
721 | |||
722 | static int exofs_write_begin_export(struct file *file, | ||
723 | struct address_space *mapping, | ||
724 | loff_t pos, unsigned len, unsigned flags, | ||
725 | struct page **pagep, void **fsdata) | ||
726 | { | ||
727 | *pagep = NULL; | ||
728 | |||
729 | return exofs_write_begin(file, mapping, pos, len, flags, pagep, | ||
730 | fsdata); | ||
731 | } | ||
732 | |||
733 | const struct address_space_operations exofs_aops = { | ||
734 | .readpage = exofs_readpage, | ||
735 | .readpages = exofs_readpages, | ||
736 | .writepage = exofs_writepage, | ||
737 | .writepages = exofs_writepages, | ||
738 | .write_begin = exofs_write_begin_export, | ||
739 | .write_end = simple_write_end, | ||
740 | }; | ||
741 | |||
45 | /****************************************************************************** | 742 | /****************************************************************************** |
46 | * INODE OPERATIONS | 743 | * INODE OPERATIONS |
47 | *****************************************************************************/ | 744 | *****************************************************************************/ |