diff options
| author | Boaz Harrosh <bharrosh@panasas.com> | 2008-10-27 13:31:34 -0400 |
|---|---|---|
| committer | Boaz Harrosh <bharrosh@panasas.com> | 2009-03-31 12:44:29 -0400 |
| commit | beaec07ba6af35d387643b76a2920a7a6e22207b (patch) | |
| tree | 74ffd4738198424f698ae238e4d3164938ef5af7 /fs/exofs | |
| parent | 982980d753223fda3864038236b7b94e246895cb (diff) | |
exofs: address_space_operations
OK Now we start to read and write from osd-objects. We try to
collect at most contiguous pages as possible in a single write/read.
The first page index is the object's offset.
TODO:
In 64-bit a single bio can carry at most 128 pages.
Add support of chaining multiple bios
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Diffstat (limited to 'fs/exofs')
| -rw-r--r-- | fs/exofs/exofs.h | 6 | ||||
| -rw-r--r-- | fs/exofs/inode.c | 697 |
2 files changed, 703 insertions, 0 deletions
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h index 825454d76f6..0276242326a 100644 --- a/fs/exofs/exofs.h +++ b/fs/exofs/exofs.h | |||
| @@ -130,6 +130,9 @@ static inline struct exofs_i_info *exofs_i(struct inode *inode) | |||
| 130 | /* inode.c */ | 130 | /* inode.c */ |
| 131 | void exofs_truncate(struct inode *inode); | 131 | void exofs_truncate(struct inode *inode); |
| 132 | int exofs_setattr(struct dentry *, struct iattr *); | 132 | int exofs_setattr(struct dentry *, struct iattr *); |
| 133 | int exofs_write_begin(struct file *file, struct address_space *mapping, | ||
| 134 | loff_t pos, unsigned len, unsigned flags, | ||
| 135 | struct page **pagep, void **fsdata); | ||
| 133 | 136 | ||
| 134 | /********************* | 137 | /********************* |
| 135 | * operation vectors * | 138 | * operation vectors * |
| @@ -138,6 +141,9 @@ int exofs_setattr(struct dentry *, struct iattr *); | |||
| 138 | extern const struct inode_operations exofs_file_inode_operations; | 141 | extern const struct inode_operations exofs_file_inode_operations; |
| 139 | extern const struct file_operations exofs_file_operations; | 142 | extern const struct file_operations exofs_file_operations; |
| 140 | 143 | ||
| 144 | /* inode.c */ | ||
| 145 | extern const struct address_space_operations exofs_aops; | ||
| 146 | |||
| 141 | /* symlink.c */ | 147 | /* symlink.c */ |
| 142 | extern const struct inode_operations exofs_symlink_inode_operations; | 148 | extern const struct inode_operations exofs_symlink_inode_operations; |
| 143 | extern const struct inode_operations exofs_fast_symlink_inode_operations; | 149 | extern const struct inode_operations exofs_fast_symlink_inode_operations; |
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index b0bda1e9122..a3691d8bfb9 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c | |||
| @@ -35,6 +35,7 @@ | |||
| 35 | 35 | ||
| 36 | #include <linux/writeback.h> | 36 | #include <linux/writeback.h> |
| 37 | #include <linux/buffer_head.h> | 37 | #include <linux/buffer_head.h> |
| 38 | #include <scsi/scsi_device.h> | ||
| 38 | 39 | ||
| 39 | #include "exofs.h" | 40 | #include "exofs.h" |
| 40 | 41 | ||
| @@ -42,6 +43,702 @@ | |||
| 42 | # define EXOFS_DEBUG_OBJ_ISIZE 1 | 43 | # define EXOFS_DEBUG_OBJ_ISIZE 1 |
| 43 | #endif | 44 | #endif |
| 44 | 45 | ||
| 46 | struct page_collect { | ||
| 47 | struct exofs_sb_info *sbi; | ||
| 48 | struct request_queue *req_q; | ||
| 49 | struct inode *inode; | ||
| 50 | unsigned expected_pages; | ||
| 51 | |||
| 52 | struct bio *bio; | ||
| 53 | unsigned nr_pages; | ||
| 54 | unsigned long length; | ||
| 55 | loff_t pg_first; /* keep 64bit also in 32-arches */ | ||
| 56 | }; | ||
| 57 | |||
| 58 | static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, | ||
| 59 | struct inode *inode) | ||
| 60 | { | ||
| 61 | struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; | ||
| 62 | struct request_queue *req_q = sbi->s_dev->scsi_device->request_queue; | ||
| 63 | |||
| 64 | pcol->sbi = sbi; | ||
| 65 | pcol->req_q = req_q; | ||
| 66 | pcol->inode = inode; | ||
| 67 | pcol->expected_pages = expected_pages; | ||
| 68 | |||
| 69 | pcol->bio = NULL; | ||
| 70 | pcol->nr_pages = 0; | ||
| 71 | pcol->length = 0; | ||
| 72 | pcol->pg_first = -1; | ||
| 73 | |||
| 74 | EXOFS_DBGMSG("_pcol_init ino=0x%lx expected_pages=%u\n", inode->i_ino, | ||
| 75 | expected_pages); | ||
| 76 | } | ||
| 77 | |||
| 78 | static void _pcol_reset(struct page_collect *pcol) | ||
| 79 | { | ||
| 80 | pcol->expected_pages -= min(pcol->nr_pages, pcol->expected_pages); | ||
| 81 | |||
| 82 | pcol->bio = NULL; | ||
| 83 | pcol->nr_pages = 0; | ||
| 84 | pcol->length = 0; | ||
| 85 | pcol->pg_first = -1; | ||
| 86 | EXOFS_DBGMSG("_pcol_reset ino=0x%lx expected_pages=%u\n", | ||
| 87 | pcol->inode->i_ino, pcol->expected_pages); | ||
| 88 | |||
| 89 | /* this is probably the end of the loop but in writes | ||
| 90 | * it might not end here. don't be left with nothing | ||
| 91 | */ | ||
| 92 | if (!pcol->expected_pages) | ||
| 93 | pcol->expected_pages = 128; | ||
| 94 | } | ||
| 95 | |||
| 96 | static int pcol_try_alloc(struct page_collect *pcol) | ||
| 97 | { | ||
| 98 | int pages = min_t(unsigned, pcol->expected_pages, BIO_MAX_PAGES); | ||
| 99 | |||
| 100 | for (; pages; pages >>= 1) { | ||
| 101 | pcol->bio = bio_alloc(GFP_KERNEL, pages); | ||
| 102 | if (likely(pcol->bio)) | ||
| 103 | return 0; | ||
| 104 | } | ||
| 105 | |||
| 106 | EXOFS_ERR("Failed to kcalloc expected_pages=%u\n", | ||
| 107 | pcol->expected_pages); | ||
| 108 | return -ENOMEM; | ||
| 109 | } | ||
| 110 | |||
| 111 | static void pcol_free(struct page_collect *pcol) | ||
| 112 | { | ||
| 113 | bio_put(pcol->bio); | ||
| 114 | pcol->bio = NULL; | ||
| 115 | } | ||
| 116 | |||
| 117 | static int pcol_add_page(struct page_collect *pcol, struct page *page, | ||
| 118 | unsigned len) | ||
| 119 | { | ||
| 120 | int added_len = bio_add_pc_page(pcol->req_q, pcol->bio, page, len, 0); | ||
| 121 | if (unlikely(len != added_len)) | ||
| 122 | return -ENOMEM; | ||
| 123 | |||
| 124 | ++pcol->nr_pages; | ||
| 125 | pcol->length += len; | ||
| 126 | return 0; | ||
| 127 | } | ||
| 128 | |||
| 129 | static int update_read_page(struct page *page, int ret) | ||
| 130 | { | ||
| 131 | if (ret == 0) { | ||
| 132 | /* Everything is OK */ | ||
| 133 | SetPageUptodate(page); | ||
| 134 | if (PageError(page)) | ||
| 135 | ClearPageError(page); | ||
| 136 | } else if (ret == -EFAULT) { | ||
| 137 | /* In this case we were trying to read something that wasn't on | ||
| 138 | * disk yet - return a page full of zeroes. This should be OK, | ||
| 139 | * because the object should be empty (if there was a write | ||
| 140 | * before this read, the read would be waiting with the page | ||
| 141 | * locked */ | ||
| 142 | clear_highpage(page); | ||
| 143 | |||
| 144 | SetPageUptodate(page); | ||
| 145 | if (PageError(page)) | ||
| 146 | ClearPageError(page); | ||
| 147 | ret = 0; /* recovered error */ | ||
| 148 | EXOFS_DBGMSG("recovered read error\n"); | ||
| 149 | } else /* Error */ | ||
| 150 | SetPageError(page); | ||
| 151 | |||
| 152 | return ret; | ||
| 153 | } | ||
| 154 | |||
| 155 | static void update_write_page(struct page *page, int ret) | ||
| 156 | { | ||
| 157 | if (ret) { | ||
| 158 | mapping_set_error(page->mapping, ret); | ||
| 159 | SetPageError(page); | ||
| 160 | } | ||
| 161 | end_page_writeback(page); | ||
| 162 | } | ||
| 163 | |||
| 164 | /* Called at the end of reads, to optionally unlock pages and update their | ||
| 165 | * status. | ||
| 166 | */ | ||
| 167 | static int __readpages_done(struct osd_request *or, struct page_collect *pcol, | ||
| 168 | bool do_unlock) | ||
| 169 | { | ||
| 170 | struct bio_vec *bvec; | ||
| 171 | int i; | ||
| 172 | u64 resid; | ||
| 173 | u64 good_bytes; | ||
| 174 | u64 length = 0; | ||
| 175 | int ret = exofs_check_ok_resid(or, &resid, NULL); | ||
| 176 | |||
| 177 | osd_end_request(or); | ||
| 178 | |||
| 179 | if (likely(!ret)) | ||
| 180 | good_bytes = pcol->length; | ||
| 181 | else if (!resid) | ||
| 182 | good_bytes = 0; | ||
| 183 | else | ||
| 184 | good_bytes = pcol->length - resid; | ||
| 185 | |||
| 186 | EXOFS_DBGMSG("readpages_done(0x%lx) good_bytes=0x%llx" | ||
| 187 | " length=0x%lx nr_pages=%u\n", | ||
| 188 | pcol->inode->i_ino, _LLU(good_bytes), pcol->length, | ||
| 189 | pcol->nr_pages); | ||
| 190 | |||
| 191 | __bio_for_each_segment(bvec, pcol->bio, i, 0) { | ||
| 192 | struct page *page = bvec->bv_page; | ||
| 193 | struct inode *inode = page->mapping->host; | ||
| 194 | int page_stat; | ||
| 195 | |||
| 196 | if (inode != pcol->inode) | ||
| 197 | continue; /* osd might add more pages at end */ | ||
| 198 | |||
| 199 | if (likely(length < good_bytes)) | ||
| 200 | page_stat = 0; | ||
| 201 | else | ||
| 202 | page_stat = ret; | ||
| 203 | |||
| 204 | EXOFS_DBGMSG(" readpages_done(0x%lx, 0x%lx) %s\n", | ||
| 205 | inode->i_ino, page->index, | ||
| 206 | page_stat ? "bad_bytes" : "good_bytes"); | ||
| 207 | |||
| 208 | ret = update_read_page(page, page_stat); | ||
| 209 | if (do_unlock) | ||
| 210 | unlock_page(page); | ||
| 211 | length += bvec->bv_len; | ||
| 212 | } | ||
| 213 | |||
| 214 | pcol_free(pcol); | ||
| 215 | EXOFS_DBGMSG("readpages_done END\n"); | ||
| 216 | return ret; | ||
| 217 | } | ||
| 218 | |||
| 219 | /* callback of async reads */ | ||
| 220 | static void readpages_done(struct osd_request *or, void *p) | ||
| 221 | { | ||
| 222 | struct page_collect *pcol = p; | ||
| 223 | |||
| 224 | __readpages_done(or, pcol, true); | ||
| 225 | atomic_dec(&pcol->sbi->s_curr_pending); | ||
| 226 | kfree(p); | ||
| 227 | } | ||
| 228 | |||
| 229 | static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw) | ||
| 230 | { | ||
| 231 | struct bio_vec *bvec; | ||
| 232 | int i; | ||
| 233 | |||
| 234 | __bio_for_each_segment(bvec, pcol->bio, i, 0) { | ||
| 235 | struct page *page = bvec->bv_page; | ||
| 236 | |||
| 237 | if (rw == READ) | ||
| 238 | update_read_page(page, ret); | ||
| 239 | else | ||
| 240 | update_write_page(page, ret); | ||
| 241 | |||
| 242 | unlock_page(page); | ||
| 243 | } | ||
| 244 | pcol_free(pcol); | ||
| 245 | } | ||
| 246 | |||
| 247 | static int read_exec(struct page_collect *pcol, bool is_sync) | ||
| 248 | { | ||
| 249 | struct exofs_i_info *oi = exofs_i(pcol->inode); | ||
| 250 | struct osd_obj_id obj = {pcol->sbi->s_pid, | ||
| 251 | pcol->inode->i_ino + EXOFS_OBJ_OFF}; | ||
| 252 | struct osd_request *or = NULL; | ||
| 253 | struct page_collect *pcol_copy = NULL; | ||
| 254 | loff_t i_start = pcol->pg_first << PAGE_CACHE_SHIFT; | ||
| 255 | int ret; | ||
| 256 | |||
| 257 | if (!pcol->bio) | ||
| 258 | return 0; | ||
| 259 | |||
| 260 | /* see comment in _readpage() about sync reads */ | ||
| 261 | WARN_ON(is_sync && (pcol->nr_pages != 1)); | ||
| 262 | |||
| 263 | or = osd_start_request(pcol->sbi->s_dev, GFP_KERNEL); | ||
| 264 | if (unlikely(!or)) { | ||
| 265 | ret = -ENOMEM; | ||
| 266 | goto err; | ||
| 267 | } | ||
| 268 | |||
| 269 | osd_req_read(or, &obj, pcol->bio, i_start); | ||
| 270 | |||
| 271 | if (is_sync) { | ||
| 272 | exofs_sync_op(or, pcol->sbi->s_timeout, oi->i_cred); | ||
| 273 | return __readpages_done(or, pcol, false); | ||
| 274 | } | ||
| 275 | |||
| 276 | pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); | ||
| 277 | if (!pcol_copy) { | ||
| 278 | ret = -ENOMEM; | ||
| 279 | goto err; | ||
| 280 | } | ||
| 281 | |||
| 282 | *pcol_copy = *pcol; | ||
| 283 | ret = exofs_async_op(or, readpages_done, pcol_copy, oi->i_cred); | ||
| 284 | if (unlikely(ret)) | ||
| 285 | goto err; | ||
| 286 | |||
| 287 | atomic_inc(&pcol->sbi->s_curr_pending); | ||
| 288 | |||
| 289 | EXOFS_DBGMSG("read_exec obj=0x%llx start=0x%llx length=0x%lx\n", | ||
| 290 | obj.id, _LLU(i_start), pcol->length); | ||
| 291 | |||
| 292 | /* pages ownership was passed to pcol_copy */ | ||
| 293 | _pcol_reset(pcol); | ||
| 294 | return 0; | ||
| 295 | |||
| 296 | err: | ||
| 297 | if (!is_sync) | ||
| 298 | _unlock_pcol_pages(pcol, ret, READ); | ||
| 299 | kfree(pcol_copy); | ||
| 300 | if (or) | ||
| 301 | osd_end_request(or); | ||
| 302 | return ret; | ||
| 303 | } | ||
| 304 | |||
| 305 | /* readpage_strip is called either directly from readpage() or by the VFS from | ||
| 306 | * within read_cache_pages(), to add one more page to be read. It will try to | ||
| 307 | * collect as many contiguous pages as posible. If a discontinuity is | ||
| 308 | * encountered, or it runs out of resources, it will submit the previous segment | ||
| 309 | * and will start a new collection. Eventually caller must submit the last | ||
| 310 | * segment if present. | ||
| 311 | */ | ||
| 312 | static int readpage_strip(void *data, struct page *page) | ||
| 313 | { | ||
| 314 | struct page_collect *pcol = data; | ||
| 315 | struct inode *inode = pcol->inode; | ||
| 316 | struct exofs_i_info *oi = exofs_i(inode); | ||
| 317 | loff_t i_size = i_size_read(inode); | ||
| 318 | pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; | ||
| 319 | size_t len; | ||
| 320 | int ret; | ||
| 321 | |||
| 322 | /* FIXME: Just for debugging, will be removed */ | ||
| 323 | if (PageUptodate(page)) | ||
| 324 | EXOFS_ERR("PageUptodate(0x%lx, 0x%lx)\n", pcol->inode->i_ino, | ||
| 325 | page->index); | ||
| 326 | |||
| 327 | if (page->index < end_index) | ||
| 328 | len = PAGE_CACHE_SIZE; | ||
| 329 | else if (page->index == end_index) | ||
| 330 | len = i_size & ~PAGE_CACHE_MASK; | ||
| 331 | else | ||
| 332 | len = 0; | ||
| 333 | |||
| 334 | if (!len || !obj_created(oi)) { | ||
| 335 | /* this will be out of bounds, or doesn't exist yet. | ||
| 336 | * Current page is cleared and the request is split | ||
| 337 | */ | ||
| 338 | clear_highpage(page); | ||
| 339 | |||
| 340 | SetPageUptodate(page); | ||
| 341 | if (PageError(page)) | ||
| 342 | ClearPageError(page); | ||
| 343 | |||
| 344 | unlock_page(page); | ||
| 345 | EXOFS_DBGMSG("readpage_strip(0x%lx, 0x%lx) empty page," | ||
| 346 | " splitting\n", inode->i_ino, page->index); | ||
| 347 | |||
| 348 | return read_exec(pcol, false); | ||
| 349 | } | ||
| 350 | |||
| 351 | try_again: | ||
| 352 | |||
| 353 | if (unlikely(pcol->pg_first == -1)) { | ||
| 354 | pcol->pg_first = page->index; | ||
| 355 | } else if (unlikely((pcol->pg_first + pcol->nr_pages) != | ||
| 356 | page->index)) { | ||
| 357 | /* Discontinuity detected, split the request */ | ||
| 358 | ret = read_exec(pcol, false); | ||
| 359 | if (unlikely(ret)) | ||
| 360 | goto fail; | ||
| 361 | goto try_again; | ||
| 362 | } | ||
| 363 | |||
| 364 | if (!pcol->bio) { | ||
| 365 | ret = pcol_try_alloc(pcol); | ||
| 366 | if (unlikely(ret)) | ||
| 367 | goto fail; | ||
| 368 | } | ||
| 369 | |||
| 370 | if (len != PAGE_CACHE_SIZE) | ||
| 371 | zero_user(page, len, PAGE_CACHE_SIZE - len); | ||
| 372 | |||
| 373 | EXOFS_DBGMSG(" readpage_strip(0x%lx, 0x%lx) len=0x%zx\n", | ||
| 374 | inode->i_ino, page->index, len); | ||
| 375 | |||
| 376 | ret = pcol_add_page(pcol, page, len); | ||
| 377 | if (ret) { | ||
| 378 | EXOFS_DBGMSG("Failed pcol_add_page pages[i]=%p " | ||
| 379 | "this_len=0x%zx nr_pages=%u length=0x%lx\n", | ||
| 380 | page, len, pcol->nr_pages, pcol->length); | ||
| 381 | |||
| 382 | /* split the request, and start again with current page */ | ||
| 383 | ret = read_exec(pcol, false); | ||
| 384 | if (unlikely(ret)) | ||
| 385 | goto fail; | ||
| 386 | |||
| 387 | goto try_again; | ||
| 388 | } | ||
| 389 | |||
| 390 | return 0; | ||
| 391 | |||
| 392 | fail: | ||
| 393 | /* SetPageError(page); ??? */ | ||
| 394 | unlock_page(page); | ||
| 395 | return ret; | ||
| 396 | } | ||
| 397 | |||
| 398 | static int exofs_readpages(struct file *file, struct address_space *mapping, | ||
| 399 | struct list_head *pages, unsigned nr_pages) | ||
| 400 | { | ||
| 401 | struct page_collect pcol; | ||
| 402 | int ret; | ||
| 403 | |||
| 404 | _pcol_init(&pcol, nr_pages, mapping->host); | ||
| 405 | |||
| 406 | ret = read_cache_pages(mapping, pages, readpage_strip, &pcol); | ||
| 407 | if (ret) { | ||
| 408 | EXOFS_ERR("read_cache_pages => %d\n", ret); | ||
| 409 | return ret; | ||
| 410 | } | ||
| 411 | |||
| 412 | return read_exec(&pcol, false); | ||
| 413 | } | ||
| 414 | |||
| 415 | static int _readpage(struct page *page, bool is_sync) | ||
| 416 | { | ||
| 417 | struct page_collect pcol; | ||
| 418 | int ret; | ||
| 419 | |||
| 420 | _pcol_init(&pcol, 1, page->mapping->host); | ||
| 421 | |||
| 422 | /* readpage_strip might call read_exec(,async) inside at several places | ||
| 423 | * but this is safe for is_async=0 since read_exec will not do anything | ||
| 424 | * when we have a single page. | ||
| 425 | */ | ||
| 426 | ret = readpage_strip(&pcol, page); | ||
| 427 | if (ret) { | ||
| 428 | EXOFS_ERR("_readpage => %d\n", ret); | ||
| 429 | return ret; | ||
| 430 | } | ||
| 431 | |||
| 432 | return read_exec(&pcol, is_sync); | ||
| 433 | } | ||
| 434 | |||
| 435 | /* | ||
| 436 | * We don't need the file | ||
| 437 | */ | ||
| 438 | static int exofs_readpage(struct file *file, struct page *page) | ||
| 439 | { | ||
| 440 | return _readpage(page, false); | ||
| 441 | } | ||
| 442 | |||
| 443 | /* Callback for osd_write. All writes are asynchronouse */ | ||
| 444 | static void writepages_done(struct osd_request *or, void *p) | ||
| 445 | { | ||
| 446 | struct page_collect *pcol = p; | ||
| 447 | struct bio_vec *bvec; | ||
| 448 | int i; | ||
| 449 | u64 resid; | ||
| 450 | u64 good_bytes; | ||
| 451 | u64 length = 0; | ||
| 452 | |||
| 453 | int ret = exofs_check_ok_resid(or, NULL, &resid); | ||
| 454 | |||
| 455 | osd_end_request(or); | ||
| 456 | atomic_dec(&pcol->sbi->s_curr_pending); | ||
| 457 | |||
| 458 | if (likely(!ret)) | ||
| 459 | good_bytes = pcol->length; | ||
| 460 | else if (!resid) | ||
| 461 | good_bytes = 0; | ||
| 462 | else | ||
| 463 | good_bytes = pcol->length - resid; | ||
| 464 | |||
| 465 | EXOFS_DBGMSG("writepages_done(0x%lx) good_bytes=0x%llx" | ||
| 466 | " length=0x%lx nr_pages=%u\n", | ||
| 467 | pcol->inode->i_ino, _LLU(good_bytes), pcol->length, | ||
| 468 | pcol->nr_pages); | ||
| 469 | |||
| 470 | __bio_for_each_segment(bvec, pcol->bio, i, 0) { | ||
| 471 | struct page *page = bvec->bv_page; | ||
| 472 | struct inode *inode = page->mapping->host; | ||
| 473 | int page_stat; | ||
| 474 | |||
| 475 | if (inode != pcol->inode) | ||
| 476 | continue; /* osd might add more pages to a bio */ | ||
| 477 | |||
| 478 | if (likely(length < good_bytes)) | ||
| 479 | page_stat = 0; | ||
| 480 | else | ||
| 481 | page_stat = ret; | ||
| 482 | |||
| 483 | update_write_page(page, page_stat); | ||
| 484 | unlock_page(page); | ||
| 485 | EXOFS_DBGMSG(" writepages_done(0x%lx, 0x%lx) status=%d\n", | ||
| 486 | inode->i_ino, page->index, page_stat); | ||
| 487 | |||
| 488 | length += bvec->bv_len; | ||
| 489 | } | ||
| 490 | |||
| 491 | pcol_free(pcol); | ||
| 492 | kfree(pcol); | ||
| 493 | EXOFS_DBGMSG("writepages_done END\n"); | ||
| 494 | } | ||
| 495 | |||
| 496 | static int write_exec(struct page_collect *pcol) | ||
| 497 | { | ||
| 498 | struct exofs_i_info *oi = exofs_i(pcol->inode); | ||
| 499 | struct osd_obj_id obj = {pcol->sbi->s_pid, | ||
| 500 | pcol->inode->i_ino + EXOFS_OBJ_OFF}; | ||
| 501 | struct osd_request *or = NULL; | ||
| 502 | struct page_collect *pcol_copy = NULL; | ||
| 503 | loff_t i_start = pcol->pg_first << PAGE_CACHE_SHIFT; | ||
| 504 | int ret; | ||
| 505 | |||
| 506 | if (!pcol->bio) | ||
| 507 | return 0; | ||
| 508 | |||
| 509 | or = osd_start_request(pcol->sbi->s_dev, GFP_KERNEL); | ||
| 510 | if (unlikely(!or)) { | ||
| 511 | EXOFS_ERR("write_exec: Faild to osd_start_request()\n"); | ||
| 512 | ret = -ENOMEM; | ||
| 513 | goto err; | ||
| 514 | } | ||
| 515 | |||
| 516 | pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); | ||
| 517 | if (!pcol_copy) { | ||
| 518 | EXOFS_ERR("write_exec: Faild to kmalloc(pcol)\n"); | ||
| 519 | ret = -ENOMEM; | ||
| 520 | goto err; | ||
| 521 | } | ||
| 522 | |||
| 523 | *pcol_copy = *pcol; | ||
| 524 | |||
| 525 | osd_req_write(or, &obj, pcol_copy->bio, i_start); | ||
| 526 | ret = exofs_async_op(or, writepages_done, pcol_copy, oi->i_cred); | ||
| 527 | if (unlikely(ret)) { | ||
| 528 | EXOFS_ERR("write_exec: exofs_async_op() Faild\n"); | ||
| 529 | goto err; | ||
| 530 | } | ||
| 531 | |||
| 532 | atomic_inc(&pcol->sbi->s_curr_pending); | ||
| 533 | EXOFS_DBGMSG("write_exec(0x%lx, 0x%llx) start=0x%llx length=0x%lx\n", | ||
| 534 | pcol->inode->i_ino, pcol->pg_first, _LLU(i_start), | ||
| 535 | pcol->length); | ||
| 536 | /* pages ownership was passed to pcol_copy */ | ||
| 537 | _pcol_reset(pcol); | ||
| 538 | return 0; | ||
| 539 | |||
| 540 | err: | ||
| 541 | _unlock_pcol_pages(pcol, ret, WRITE); | ||
| 542 | kfree(pcol_copy); | ||
| 543 | if (or) | ||
| 544 | osd_end_request(or); | ||
| 545 | return ret; | ||
| 546 | } | ||
| 547 | |||
| 548 | /* writepage_strip is called either directly from writepage() or by the VFS from | ||
| 549 | * within write_cache_pages(), to add one more page to be written to storage. | ||
| 550 | * It will try to collect as many contiguous pages as possible. If a | ||
| 551 | * discontinuity is encountered or it runs out of resources it will submit the | ||
| 552 | * previous segment and will start a new collection. | ||
| 553 | * Eventually caller must submit the last segment if present. | ||
| 554 | */ | ||
| 555 | static int writepage_strip(struct page *page, | ||
| 556 | struct writeback_control *wbc_unused, void *data) | ||
| 557 | { | ||
| 558 | struct page_collect *pcol = data; | ||
| 559 | struct inode *inode = pcol->inode; | ||
| 560 | struct exofs_i_info *oi = exofs_i(inode); | ||
| 561 | loff_t i_size = i_size_read(inode); | ||
| 562 | pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; | ||
| 563 | size_t len; | ||
| 564 | int ret; | ||
| 565 | |||
| 566 | BUG_ON(!PageLocked(page)); | ||
| 567 | |||
| 568 | ret = wait_obj_created(oi); | ||
| 569 | if (unlikely(ret)) | ||
| 570 | goto fail; | ||
| 571 | |||
| 572 | if (page->index < end_index) | ||
| 573 | /* in this case, the page is within the limits of the file */ | ||
| 574 | len = PAGE_CACHE_SIZE; | ||
| 575 | else { | ||
| 576 | len = i_size & ~PAGE_CACHE_MASK; | ||
| 577 | |||
| 578 | if (page->index > end_index || !len) { | ||
| 579 | /* in this case, the page is outside the limits | ||
| 580 | * (truncate in progress) | ||
| 581 | */ | ||
| 582 | ret = write_exec(pcol); | ||
| 583 | if (unlikely(ret)) | ||
| 584 | goto fail; | ||
| 585 | if (PageError(page)) | ||
| 586 | ClearPageError(page); | ||
| 587 | unlock_page(page); | ||
| 588 | return 0; | ||
| 589 | } | ||
| 590 | } | ||
| 591 | |||
| 592 | try_again: | ||
| 593 | |||
| 594 | if (unlikely(pcol->pg_first == -1)) { | ||
| 595 | pcol->pg_first = page->index; | ||
| 596 | } else if (unlikely((pcol->pg_first + pcol->nr_pages) != | ||
| 597 | page->index)) { | ||
| 598 | /* Discontinuity detected, split the request */ | ||
| 599 | ret = write_exec(pcol); | ||
| 600 | if (unlikely(ret)) | ||
| 601 | goto fail; | ||
| 602 | goto try_again; | ||
| 603 | } | ||
| 604 | |||
| 605 | if (!pcol->bio) { | ||
| 606 | ret = pcol_try_alloc(pcol); | ||
| 607 | if (unlikely(ret)) | ||
| 608 | goto fail; | ||
| 609 | } | ||
| 610 | |||
| 611 | EXOFS_DBGMSG(" writepage_strip(0x%lx, 0x%lx) len=0x%zx\n", | ||
| 612 | inode->i_ino, page->index, len); | ||
| 613 | |||
| 614 | ret = pcol_add_page(pcol, page, len); | ||
| 615 | if (unlikely(ret)) { | ||
| 616 | EXOFS_DBGMSG("Failed pcol_add_page " | ||
| 617 | "nr_pages=%u total_length=0x%lx\n", | ||
| 618 | pcol->nr_pages, pcol->length); | ||
| 619 | |||
| 620 | /* split the request, next loop will start again */ | ||
| 621 | ret = write_exec(pcol); | ||
| 622 | if (unlikely(ret)) { | ||
| 623 | EXOFS_DBGMSG("write_exec faild => %d", ret); | ||
| 624 | goto fail; | ||
| 625 | } | ||
| 626 | |||
| 627 | goto try_again; | ||
| 628 | } | ||
| 629 | |||
| 630 | BUG_ON(PageWriteback(page)); | ||
| 631 | set_page_writeback(page); | ||
| 632 | |||
| 633 | return 0; | ||
| 634 | |||
| 635 | fail: | ||
| 636 | set_bit(AS_EIO, &page->mapping->flags); | ||
| 637 | unlock_page(page); | ||
| 638 | return ret; | ||
| 639 | } | ||
| 640 | |||
| 641 | static int exofs_writepages(struct address_space *mapping, | ||
| 642 | struct writeback_control *wbc) | ||
| 643 | { | ||
| 644 | struct page_collect pcol; | ||
| 645 | long start, end, expected_pages; | ||
| 646 | int ret; | ||
| 647 | |||
| 648 | start = wbc->range_start >> PAGE_CACHE_SHIFT; | ||
| 649 | end = (wbc->range_end == LLONG_MAX) ? | ||
| 650 | start + mapping->nrpages : | ||
| 651 | wbc->range_end >> PAGE_CACHE_SHIFT; | ||
| 652 | |||
| 653 | if (start || end) | ||
| 654 | expected_pages = min(end - start + 1, 32L); | ||
| 655 | else | ||
| 656 | expected_pages = mapping->nrpages; | ||
| 657 | |||
| 658 | EXOFS_DBGMSG("inode(0x%lx) wbc->start=0x%llx wbc->end=0x%llx" | ||
| 659 | " m->nrpages=%lu start=0x%lx end=0x%lx\n", | ||
| 660 | mapping->host->i_ino, wbc->range_start, wbc->range_end, | ||
| 661 | mapping->nrpages, start, end); | ||
| 662 | |||
| 663 | _pcol_init(&pcol, expected_pages, mapping->host); | ||
| 664 | |||
| 665 | ret = write_cache_pages(mapping, wbc, writepage_strip, &pcol); | ||
| 666 | if (ret) { | ||
| 667 | EXOFS_ERR("write_cache_pages => %d\n", ret); | ||
| 668 | return ret; | ||
| 669 | } | ||
| 670 | |||
| 671 | return write_exec(&pcol); | ||
| 672 | } | ||
| 673 | |||
| 674 | static int exofs_writepage(struct page *page, struct writeback_control *wbc) | ||
| 675 | { | ||
| 676 | struct page_collect pcol; | ||
| 677 | int ret; | ||
| 678 | |||
| 679 | _pcol_init(&pcol, 1, page->mapping->host); | ||
| 680 | |||
| 681 | ret = writepage_strip(page, NULL, &pcol); | ||
| 682 | if (ret) { | ||
| 683 | EXOFS_ERR("exofs_writepage => %d\n", ret); | ||
| 684 | return ret; | ||
| 685 | } | ||
| 686 | |||
| 687 | return write_exec(&pcol); | ||
| 688 | } | ||
| 689 | |||
| 690 | int exofs_write_begin(struct file *file, struct address_space *mapping, | ||
| 691 | loff_t pos, unsigned len, unsigned flags, | ||
| 692 | struct page **pagep, void **fsdata) | ||
| 693 | { | ||
| 694 | int ret = 0; | ||
| 695 | struct page *page; | ||
| 696 | |||
| 697 | page = *pagep; | ||
| 698 | if (page == NULL) { | ||
| 699 | ret = simple_write_begin(file, mapping, pos, len, flags, pagep, | ||
| 700 | fsdata); | ||
| 701 | if (ret) { | ||
| 702 | EXOFS_DBGMSG("simple_write_begin faild\n"); | ||
| 703 | return ret; | ||
| 704 | } | ||
| 705 | |||
| 706 | page = *pagep; | ||
| 707 | } | ||
| 708 | |||
| 709 | /* read modify write */ | ||
| 710 | if (!PageUptodate(page) && (len != PAGE_CACHE_SIZE)) { | ||
| 711 | ret = _readpage(page, true); | ||
| 712 | if (ret) { | ||
| 713 | /*SetPageError was done by _readpage. Is it ok?*/ | ||
| 714 | unlock_page(page); | ||
| 715 | EXOFS_DBGMSG("__readpage_filler faild\n"); | ||
| 716 | } | ||
| 717 | } | ||
| 718 | |||
| 719 | return ret; | ||
| 720 | } | ||
| 721 | |||
| 722 | static int exofs_write_begin_export(struct file *file, | ||
| 723 | struct address_space *mapping, | ||
| 724 | loff_t pos, unsigned len, unsigned flags, | ||
| 725 | struct page **pagep, void **fsdata) | ||
| 726 | { | ||
| 727 | *pagep = NULL; | ||
| 728 | |||
| 729 | return exofs_write_begin(file, mapping, pos, len, flags, pagep, | ||
| 730 | fsdata); | ||
| 731 | } | ||
| 732 | |||
| 733 | const struct address_space_operations exofs_aops = { | ||
| 734 | .readpage = exofs_readpage, | ||
| 735 | .readpages = exofs_readpages, | ||
| 736 | .writepage = exofs_writepage, | ||
| 737 | .writepages = exofs_writepages, | ||
| 738 | .write_begin = exofs_write_begin_export, | ||
| 739 | .write_end = simple_write_end, | ||
| 740 | }; | ||
| 741 | |||
| 45 | /****************************************************************************** | 742 | /****************************************************************************** |
| 46 | * INODE OPERATIONS | 743 | * INODE OPERATIONS |
| 47 | *****************************************************************************/ | 744 | *****************************************************************************/ |
