diff options
Diffstat (limited to 'fs/nfs/blocklayout/blocklayout.c')
| -rw-r--r-- | fs/nfs/blocklayout/blocklayout.c | 306 |
1 files changed, 267 insertions, 39 deletions
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index dd392ed5f2e2..f1027b06a1a9 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c | |||
| @@ -37,6 +37,7 @@ | |||
| 37 | #include <linux/bio.h> /* struct bio */ | 37 | #include <linux/bio.h> /* struct bio */ |
| 38 | #include <linux/buffer_head.h> /* various write calls */ | 38 | #include <linux/buffer_head.h> /* various write calls */ |
| 39 | #include <linux/prefetch.h> | 39 | #include <linux/prefetch.h> |
| 40 | #include <linux/pagevec.h> | ||
| 40 | 41 | ||
| 41 | #include "../pnfs.h" | 42 | #include "../pnfs.h" |
| 42 | #include "../internal.h" | 43 | #include "../internal.h" |
| @@ -162,25 +163,39 @@ static struct bio *bl_alloc_init_bio(int npg, sector_t isect, | |||
| 162 | return bio; | 163 | return bio; |
| 163 | } | 164 | } |
| 164 | 165 | ||
| 165 | static struct bio *bl_add_page_to_bio(struct bio *bio, int npg, int rw, | 166 | static struct bio *do_add_page_to_bio(struct bio *bio, int npg, int rw, |
| 166 | sector_t isect, struct page *page, | 167 | sector_t isect, struct page *page, |
| 167 | struct pnfs_block_extent *be, | 168 | struct pnfs_block_extent *be, |
| 168 | void (*end_io)(struct bio *, int err), | 169 | void (*end_io)(struct bio *, int err), |
| 169 | struct parallel_io *par) | 170 | struct parallel_io *par, |
| 171 | unsigned int offset, int len) | ||
| 170 | { | 172 | { |
| 173 | isect = isect + (offset >> SECTOR_SHIFT); | ||
| 174 | dprintk("%s: npg %d rw %d isect %llu offset %u len %d\n", __func__, | ||
| 175 | npg, rw, (unsigned long long)isect, offset, len); | ||
| 171 | retry: | 176 | retry: |
| 172 | if (!bio) { | 177 | if (!bio) { |
| 173 | bio = bl_alloc_init_bio(npg, isect, be, end_io, par); | 178 | bio = bl_alloc_init_bio(npg, isect, be, end_io, par); |
| 174 | if (!bio) | 179 | if (!bio) |
| 175 | return ERR_PTR(-ENOMEM); | 180 | return ERR_PTR(-ENOMEM); |
| 176 | } | 181 | } |
| 177 | if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { | 182 | if (bio_add_page(bio, page, len, offset) < len) { |
| 178 | bio = bl_submit_bio(rw, bio); | 183 | bio = bl_submit_bio(rw, bio); |
| 179 | goto retry; | 184 | goto retry; |
| 180 | } | 185 | } |
| 181 | return bio; | 186 | return bio; |
| 182 | } | 187 | } |
| 183 | 188 | ||
| 189 | static struct bio *bl_add_page_to_bio(struct bio *bio, int npg, int rw, | ||
| 190 | sector_t isect, struct page *page, | ||
| 191 | struct pnfs_block_extent *be, | ||
| 192 | void (*end_io)(struct bio *, int err), | ||
| 193 | struct parallel_io *par) | ||
| 194 | { | ||
| 195 | return do_add_page_to_bio(bio, npg, rw, isect, page, be, | ||
| 196 | end_io, par, 0, PAGE_CACHE_SIZE); | ||
| 197 | } | ||
| 198 | |||
| 184 | /* This is basically copied from mpage_end_io_read */ | 199 | /* This is basically copied from mpage_end_io_read */ |
| 185 | static void bl_end_io_read(struct bio *bio, int err) | 200 | static void bl_end_io_read(struct bio *bio, int err) |
| 186 | { | 201 | { |
| @@ -228,14 +243,6 @@ bl_end_par_io_read(void *data, int unused) | |||
| 228 | schedule_work(&rdata->task.u.tk_work); | 243 | schedule_work(&rdata->task.u.tk_work); |
| 229 | } | 244 | } |
| 230 | 245 | ||
| 231 | static bool | ||
| 232 | bl_check_alignment(u64 offset, u32 len, unsigned long blkmask) | ||
| 233 | { | ||
| 234 | if ((offset & blkmask) || (len & blkmask)) | ||
| 235 | return false; | ||
| 236 | return true; | ||
| 237 | } | ||
| 238 | |||
| 239 | static enum pnfs_try_status | 246 | static enum pnfs_try_status |
| 240 | bl_read_pagelist(struct nfs_read_data *rdata) | 247 | bl_read_pagelist(struct nfs_read_data *rdata) |
| 241 | { | 248 | { |
| @@ -246,15 +253,15 @@ bl_read_pagelist(struct nfs_read_data *rdata) | |||
| 246 | sector_t isect, extent_length = 0; | 253 | sector_t isect, extent_length = 0; |
| 247 | struct parallel_io *par; | 254 | struct parallel_io *par; |
| 248 | loff_t f_offset = rdata->args.offset; | 255 | loff_t f_offset = rdata->args.offset; |
| 256 | size_t bytes_left = rdata->args.count; | ||
| 257 | unsigned int pg_offset, pg_len; | ||
| 249 | struct page **pages = rdata->args.pages; | 258 | struct page **pages = rdata->args.pages; |
| 250 | int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT; | 259 | int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT; |
| 260 | const bool is_dio = (header->dreq != NULL); | ||
| 251 | 261 | ||
| 252 | dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__, | 262 | dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__, |
| 253 | rdata->pages.npages, f_offset, (unsigned int)rdata->args.count); | 263 | rdata->pages.npages, f_offset, (unsigned int)rdata->args.count); |
| 254 | 264 | ||
| 255 | if (!bl_check_alignment(f_offset, rdata->args.count, PAGE_CACHE_MASK)) | ||
| 256 | goto use_mds; | ||
| 257 | |||
| 258 | par = alloc_parallel(rdata); | 265 | par = alloc_parallel(rdata); |
| 259 | if (!par) | 266 | if (!par) |
| 260 | goto use_mds; | 267 | goto use_mds; |
| @@ -284,36 +291,53 @@ bl_read_pagelist(struct nfs_read_data *rdata) | |||
| 284 | extent_length = min(extent_length, cow_length); | 291 | extent_length = min(extent_length, cow_length); |
| 285 | } | 292 | } |
| 286 | } | 293 | } |
| 294 | |||
| 295 | if (is_dio) { | ||
| 296 | pg_offset = f_offset & ~PAGE_CACHE_MASK; | ||
| 297 | if (pg_offset + bytes_left > PAGE_CACHE_SIZE) | ||
| 298 | pg_len = PAGE_CACHE_SIZE - pg_offset; | ||
| 299 | else | ||
| 300 | pg_len = bytes_left; | ||
| 301 | |||
| 302 | f_offset += pg_len; | ||
| 303 | bytes_left -= pg_len; | ||
| 304 | isect += (pg_offset >> SECTOR_SHIFT); | ||
| 305 | } else { | ||
| 306 | pg_offset = 0; | ||
| 307 | pg_len = PAGE_CACHE_SIZE; | ||
| 308 | } | ||
| 309 | |||
| 287 | hole = is_hole(be, isect); | 310 | hole = is_hole(be, isect); |
| 288 | if (hole && !cow_read) { | 311 | if (hole && !cow_read) { |
| 289 | bio = bl_submit_bio(READ, bio); | 312 | bio = bl_submit_bio(READ, bio); |
| 290 | /* Fill hole w/ zeroes w/o accessing device */ | 313 | /* Fill hole w/ zeroes w/o accessing device */ |
| 291 | dprintk("%s Zeroing page for hole\n", __func__); | 314 | dprintk("%s Zeroing page for hole\n", __func__); |
| 292 | zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE); | 315 | zero_user_segment(pages[i], pg_offset, pg_len); |
| 293 | print_page(pages[i]); | 316 | print_page(pages[i]); |
| 294 | SetPageUptodate(pages[i]); | 317 | SetPageUptodate(pages[i]); |
| 295 | } else { | 318 | } else { |
| 296 | struct pnfs_block_extent *be_read; | 319 | struct pnfs_block_extent *be_read; |
| 297 | 320 | ||
| 298 | be_read = (hole && cow_read) ? cow_read : be; | 321 | be_read = (hole && cow_read) ? cow_read : be; |
| 299 | bio = bl_add_page_to_bio(bio, rdata->pages.npages - i, | 322 | bio = do_add_page_to_bio(bio, rdata->pages.npages - i, |
| 300 | READ, | 323 | READ, |
| 301 | isect, pages[i], be_read, | 324 | isect, pages[i], be_read, |
| 302 | bl_end_io_read, par); | 325 | bl_end_io_read, par, |
| 326 | pg_offset, pg_len); | ||
| 303 | if (IS_ERR(bio)) { | 327 | if (IS_ERR(bio)) { |
| 304 | header->pnfs_error = PTR_ERR(bio); | 328 | header->pnfs_error = PTR_ERR(bio); |
| 305 | bio = NULL; | 329 | bio = NULL; |
| 306 | goto out; | 330 | goto out; |
| 307 | } | 331 | } |
| 308 | } | 332 | } |
| 309 | isect += PAGE_CACHE_SECTORS; | 333 | isect += (pg_len >> SECTOR_SHIFT); |
| 310 | extent_length -= PAGE_CACHE_SECTORS; | 334 | extent_length -= PAGE_CACHE_SECTORS; |
| 311 | } | 335 | } |
| 312 | if ((isect << SECTOR_SHIFT) >= header->inode->i_size) { | 336 | if ((isect << SECTOR_SHIFT) >= header->inode->i_size) { |
| 313 | rdata->res.eof = 1; | 337 | rdata->res.eof = 1; |
| 314 | rdata->res.count = header->inode->i_size - f_offset; | 338 | rdata->res.count = header->inode->i_size - rdata->args.offset; |
| 315 | } else { | 339 | } else { |
| 316 | rdata->res.count = (isect << SECTOR_SHIFT) - f_offset; | 340 | rdata->res.count = (isect << SECTOR_SHIFT) - rdata->args.offset; |
| 317 | } | 341 | } |
| 318 | out: | 342 | out: |
| 319 | bl_put_extent(be); | 343 | bl_put_extent(be); |
| @@ -461,6 +485,106 @@ map_block(struct buffer_head *bh, sector_t isect, struct pnfs_block_extent *be) | |||
| 461 | return; | 485 | return; |
| 462 | } | 486 | } |
| 463 | 487 | ||
| 488 | static void | ||
| 489 | bl_read_single_end_io(struct bio *bio, int error) | ||
| 490 | { | ||
| 491 | struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; | ||
| 492 | struct page *page = bvec->bv_page; | ||
| 493 | |||
| 494 | /* Only one page in bvec */ | ||
| 495 | unlock_page(page); | ||
| 496 | } | ||
| 497 | |||
| 498 | static int | ||
| 499 | bl_do_readpage_sync(struct page *page, struct pnfs_block_extent *be, | ||
| 500 | unsigned int offset, unsigned int len) | ||
| 501 | { | ||
| 502 | struct bio *bio; | ||
| 503 | struct page *shadow_page; | ||
| 504 | sector_t isect; | ||
| 505 | char *kaddr, *kshadow_addr; | ||
| 506 | int ret = 0; | ||
| 507 | |||
| 508 | dprintk("%s: offset %u len %u\n", __func__, offset, len); | ||
| 509 | |||
| 510 | shadow_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); | ||
| 511 | if (shadow_page == NULL) | ||
| 512 | return -ENOMEM; | ||
| 513 | |||
| 514 | bio = bio_alloc(GFP_NOIO, 1); | ||
| 515 | if (bio == NULL) | ||
| 516 | return -ENOMEM; | ||
| 517 | |||
| 518 | isect = (page->index << PAGE_CACHE_SECTOR_SHIFT) + | ||
| 519 | (offset / SECTOR_SIZE); | ||
| 520 | |||
| 521 | bio->bi_sector = isect - be->be_f_offset + be->be_v_offset; | ||
| 522 | bio->bi_bdev = be->be_mdev; | ||
| 523 | bio->bi_end_io = bl_read_single_end_io; | ||
| 524 | |||
| 525 | lock_page(shadow_page); | ||
| 526 | if (bio_add_page(bio, shadow_page, | ||
| 527 | SECTOR_SIZE, round_down(offset, SECTOR_SIZE)) == 0) { | ||
| 528 | unlock_page(shadow_page); | ||
| 529 | bio_put(bio); | ||
| 530 | return -EIO; | ||
| 531 | } | ||
| 532 | |||
| 533 | submit_bio(READ, bio); | ||
| 534 | wait_on_page_locked(shadow_page); | ||
| 535 | if (unlikely(!test_bit(BIO_UPTODATE, &bio->bi_flags))) { | ||
| 536 | ret = -EIO; | ||
| 537 | } else { | ||
| 538 | kaddr = kmap_atomic(page); | ||
| 539 | kshadow_addr = kmap_atomic(shadow_page); | ||
| 540 | memcpy(kaddr + offset, kshadow_addr + offset, len); | ||
| 541 | kunmap_atomic(kshadow_addr); | ||
| 542 | kunmap_atomic(kaddr); | ||
| 543 | } | ||
| 544 | __free_page(shadow_page); | ||
| 545 | bio_put(bio); | ||
| 546 | |||
| 547 | return ret; | ||
| 548 | } | ||
| 549 | |||
| 550 | static int | ||
| 551 | bl_read_partial_page_sync(struct page *page, struct pnfs_block_extent *be, | ||
| 552 | unsigned int dirty_offset, unsigned int dirty_len, | ||
| 553 | bool full_page) | ||
| 554 | { | ||
| 555 | int ret = 0; | ||
| 556 | unsigned int start, end; | ||
| 557 | |||
| 558 | if (full_page) { | ||
| 559 | start = 0; | ||
| 560 | end = PAGE_CACHE_SIZE; | ||
| 561 | } else { | ||
| 562 | start = round_down(dirty_offset, SECTOR_SIZE); | ||
| 563 | end = round_up(dirty_offset + dirty_len, SECTOR_SIZE); | ||
| 564 | } | ||
| 565 | |||
| 566 | dprintk("%s: offset %u len %d\n", __func__, dirty_offset, dirty_len); | ||
| 567 | if (!be) { | ||
| 568 | zero_user_segments(page, start, dirty_offset, | ||
| 569 | dirty_offset + dirty_len, end); | ||
| 570 | if (start == 0 && end == PAGE_CACHE_SIZE && | ||
| 571 | trylock_page(page)) { | ||
| 572 | SetPageUptodate(page); | ||
| 573 | unlock_page(page); | ||
| 574 | } | ||
| 575 | return ret; | ||
| 576 | } | ||
| 577 | |||
| 578 | if (start != dirty_offset) | ||
| 579 | ret = bl_do_readpage_sync(page, be, start, dirty_offset - start); | ||
| 580 | |||
| 581 | if (!ret && (dirty_offset + dirty_len < end)) | ||
| 582 | ret = bl_do_readpage_sync(page, be, dirty_offset + dirty_len, | ||
| 583 | end - dirty_offset - dirty_len); | ||
| 584 | |||
| 585 | return ret; | ||
| 586 | } | ||
| 587 | |||
| 464 | /* Given an unmapped page, zero it or read in page for COW, page is locked | 588 | /* Given an unmapped page, zero it or read in page for COW, page is locked |
| 465 | * by caller. | 589 | * by caller. |
| 466 | */ | 590 | */ |
| @@ -494,7 +618,6 @@ init_page_for_write(struct page *page, struct pnfs_block_extent *cow_read) | |||
| 494 | SetPageUptodate(page); | 618 | SetPageUptodate(page); |
| 495 | 619 | ||
| 496 | cleanup: | 620 | cleanup: |
| 497 | bl_put_extent(cow_read); | ||
| 498 | if (bh) | 621 | if (bh) |
| 499 | free_buffer_head(bh); | 622 | free_buffer_head(bh); |
| 500 | if (ret) { | 623 | if (ret) { |
| @@ -566,6 +689,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync) | |||
| 566 | struct parallel_io *par = NULL; | 689 | struct parallel_io *par = NULL; |
| 567 | loff_t offset = wdata->args.offset; | 690 | loff_t offset = wdata->args.offset; |
| 568 | size_t count = wdata->args.count; | 691 | size_t count = wdata->args.count; |
| 692 | unsigned int pg_offset, pg_len, saved_len; | ||
| 569 | struct page **pages = wdata->args.pages; | 693 | struct page **pages = wdata->args.pages; |
| 570 | struct page *page; | 694 | struct page *page; |
| 571 | pgoff_t index; | 695 | pgoff_t index; |
| @@ -574,10 +698,13 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync) | |||
| 574 | NFS_SERVER(header->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT; | 698 | NFS_SERVER(header->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT; |
| 575 | 699 | ||
| 576 | dprintk("%s enter, %Zu@%lld\n", __func__, count, offset); | 700 | dprintk("%s enter, %Zu@%lld\n", __func__, count, offset); |
| 577 | /* Check for alignment first */ | ||
| 578 | if (!bl_check_alignment(offset, count, PAGE_CACHE_MASK)) | ||
| 579 | goto out_mds; | ||
| 580 | 701 | ||
| 702 | if (header->dreq != NULL && | ||
| 703 | (!IS_ALIGNED(offset, NFS_SERVER(header->inode)->pnfs_blksize) || | ||
| 704 | !IS_ALIGNED(count, NFS_SERVER(header->inode)->pnfs_blksize))) { | ||
| 705 | dprintk("pnfsblock nonblock aligned DIO writes. Resend MDS\n"); | ||
| 706 | goto out_mds; | ||
| 707 | } | ||
| 581 | /* At this point, wdata->pages is a (sequential) list of nfs_pages. | 708 | /* At this point, wdata->pages is a (sequential) list of nfs_pages. |
| 582 | * We want to write each, and if there is an error set pnfs_error | 709 | * We want to write each, and if there is an error set pnfs_error |
| 583 | * to have it redone using nfs. | 710 | * to have it redone using nfs. |
| @@ -674,10 +801,11 @@ next_page: | |||
| 674 | if (!extent_length) { | 801 | if (!extent_length) { |
| 675 | /* We've used up the previous extent */ | 802 | /* We've used up the previous extent */ |
| 676 | bl_put_extent(be); | 803 | bl_put_extent(be); |
| 804 | bl_put_extent(cow_read); | ||
| 677 | bio = bl_submit_bio(WRITE, bio); | 805 | bio = bl_submit_bio(WRITE, bio); |
| 678 | /* Get the next one */ | 806 | /* Get the next one */ |
| 679 | be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg), | 807 | be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg), |
| 680 | isect, NULL); | 808 | isect, &cow_read); |
| 681 | if (!be || !is_writable(be, isect)) { | 809 | if (!be || !is_writable(be, isect)) { |
| 682 | header->pnfs_error = -EINVAL; | 810 | header->pnfs_error = -EINVAL; |
| 683 | goto out; | 811 | goto out; |
| @@ -694,7 +822,26 @@ next_page: | |||
| 694 | extent_length = be->be_length - | 822 | extent_length = be->be_length - |
| 695 | (isect - be->be_f_offset); | 823 | (isect - be->be_f_offset); |
| 696 | } | 824 | } |
| 697 | if (be->be_state == PNFS_BLOCK_INVALID_DATA) { | 825 | |
| 826 | dprintk("%s offset %lld count %Zu\n", __func__, offset, count); | ||
| 827 | pg_offset = offset & ~PAGE_CACHE_MASK; | ||
| 828 | if (pg_offset + count > PAGE_CACHE_SIZE) | ||
| 829 | pg_len = PAGE_CACHE_SIZE - pg_offset; | ||
| 830 | else | ||
| 831 | pg_len = count; | ||
| 832 | |||
| 833 | saved_len = pg_len; | ||
| 834 | if (be->be_state == PNFS_BLOCK_INVALID_DATA && | ||
| 835 | !bl_is_sector_init(be->be_inval, isect)) { | ||
| 836 | ret = bl_read_partial_page_sync(pages[i], cow_read, | ||
| 837 | pg_offset, pg_len, true); | ||
| 838 | if (ret) { | ||
| 839 | dprintk("%s bl_read_partial_page_sync fail %d\n", | ||
| 840 | __func__, ret); | ||
| 841 | header->pnfs_error = ret; | ||
| 842 | goto out; | ||
| 843 | } | ||
| 844 | |||
| 698 | ret = bl_mark_sectors_init(be->be_inval, isect, | 845 | ret = bl_mark_sectors_init(be->be_inval, isect, |
| 699 | PAGE_CACHE_SECTORS); | 846 | PAGE_CACHE_SECTORS); |
| 700 | if (unlikely(ret)) { | 847 | if (unlikely(ret)) { |
| @@ -703,15 +850,35 @@ next_page: | |||
| 703 | header->pnfs_error = ret; | 850 | header->pnfs_error = ret; |
| 704 | goto out; | 851 | goto out; |
| 705 | } | 852 | } |
| 853 | |||
| 854 | /* Expand to full page write */ | ||
| 855 | pg_offset = 0; | ||
| 856 | pg_len = PAGE_CACHE_SIZE; | ||
| 857 | } else if ((pg_offset & (SECTOR_SIZE - 1)) || | ||
| 858 | (pg_len & (SECTOR_SIZE - 1))){ | ||
| 859 | /* ahh, nasty case. We have to do sync full sector | ||
| 860 | * read-modify-write cycles. | ||
| 861 | */ | ||
| 862 | unsigned int saved_offset = pg_offset; | ||
| 863 | ret = bl_read_partial_page_sync(pages[i], be, pg_offset, | ||
| 864 | pg_len, false); | ||
| 865 | pg_offset = round_down(pg_offset, SECTOR_SIZE); | ||
| 866 | pg_len = round_up(saved_offset + pg_len, SECTOR_SIZE) | ||
| 867 | - pg_offset; | ||
| 706 | } | 868 | } |
| 707 | bio = bl_add_page_to_bio(bio, wdata->pages.npages - i, WRITE, | 869 | |
| 870 | |||
| 871 | bio = do_add_page_to_bio(bio, wdata->pages.npages - i, WRITE, | ||
| 708 | isect, pages[i], be, | 872 | isect, pages[i], be, |
| 709 | bl_end_io_write, par); | 873 | bl_end_io_write, par, |
| 874 | pg_offset, pg_len); | ||
| 710 | if (IS_ERR(bio)) { | 875 | if (IS_ERR(bio)) { |
| 711 | header->pnfs_error = PTR_ERR(bio); | 876 | header->pnfs_error = PTR_ERR(bio); |
| 712 | bio = NULL; | 877 | bio = NULL; |
| 713 | goto out; | 878 | goto out; |
| 714 | } | 879 | } |
| 880 | offset += saved_len; | ||
| 881 | count -= saved_len; | ||
| 715 | isect += PAGE_CACHE_SECTORS; | 882 | isect += PAGE_CACHE_SECTORS; |
| 716 | last_isect = isect; | 883 | last_isect = isect; |
| 717 | extent_length -= PAGE_CACHE_SECTORS; | 884 | extent_length -= PAGE_CACHE_SECTORS; |
| @@ -729,17 +896,16 @@ next_page: | |||
| 729 | } | 896 | } |
| 730 | 897 | ||
| 731 | write_done: | 898 | write_done: |
| 732 | wdata->res.count = (last_isect << SECTOR_SHIFT) - (offset); | 899 | wdata->res.count = wdata->args.count; |
| 733 | if (count < wdata->res.count) { | ||
| 734 | wdata->res.count = count; | ||
| 735 | } | ||
| 736 | out: | 900 | out: |
| 737 | bl_put_extent(be); | 901 | bl_put_extent(be); |
| 902 | bl_put_extent(cow_read); | ||
| 738 | bl_submit_bio(WRITE, bio); | 903 | bl_submit_bio(WRITE, bio); |
| 739 | put_parallel(par); | 904 | put_parallel(par); |
| 740 | return PNFS_ATTEMPTED; | 905 | return PNFS_ATTEMPTED; |
| 741 | out_mds: | 906 | out_mds: |
| 742 | bl_put_extent(be); | 907 | bl_put_extent(be); |
| 908 | bl_put_extent(cow_read); | ||
| 743 | kfree(par); | 909 | kfree(par); |
| 744 | return PNFS_NOT_ATTEMPTED; | 910 | return PNFS_NOT_ATTEMPTED; |
| 745 | } | 911 | } |
| @@ -874,7 +1040,7 @@ static void free_blk_mountid(struct block_mount_id *mid) | |||
| 874 | } | 1040 | } |
| 875 | } | 1041 | } |
| 876 | 1042 | ||
| 877 | /* This is mostly copied from the filelayout's get_device_info function. | 1043 | /* This is mostly copied from the filelayout_get_device_info function. |
| 878 | * It seems much of this should be at the generic pnfs level. | 1044 | * It seems much of this should be at the generic pnfs level. |
| 879 | */ | 1045 | */ |
| 880 | static struct pnfs_block_dev * | 1046 | static struct pnfs_block_dev * |
| @@ -1011,33 +1177,95 @@ bl_clear_layoutdriver(struct nfs_server *server) | |||
| 1011 | return 0; | 1177 | return 0; |
| 1012 | } | 1178 | } |
| 1013 | 1179 | ||
| 1180 | static bool | ||
| 1181 | is_aligned_req(struct nfs_page *req, unsigned int alignment) | ||
| 1182 | { | ||
| 1183 | return IS_ALIGNED(req->wb_offset, alignment) && | ||
| 1184 | IS_ALIGNED(req->wb_bytes, alignment); | ||
| 1185 | } | ||
| 1186 | |||
| 1014 | static void | 1187 | static void |
| 1015 | bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) | 1188 | bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) |
| 1016 | { | 1189 | { |
| 1017 | if (!bl_check_alignment(req->wb_offset, req->wb_bytes, PAGE_CACHE_MASK)) | 1190 | if (pgio->pg_dreq != NULL && |
| 1191 | !is_aligned_req(req, SECTOR_SIZE)) | ||
| 1018 | nfs_pageio_reset_read_mds(pgio); | 1192 | nfs_pageio_reset_read_mds(pgio); |
| 1019 | else | 1193 | else |
| 1020 | pnfs_generic_pg_init_read(pgio, req); | 1194 | pnfs_generic_pg_init_read(pgio, req); |
| 1021 | } | 1195 | } |
| 1022 | 1196 | ||
| 1197 | static bool | ||
| 1198 | bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, | ||
| 1199 | struct nfs_page *req) | ||
| 1200 | { | ||
| 1201 | if (pgio->pg_dreq != NULL && | ||
| 1202 | !is_aligned_req(req, SECTOR_SIZE)) | ||
| 1203 | return false; | ||
| 1204 | |||
| 1205 | return pnfs_generic_pg_test(pgio, prev, req); | ||
| 1206 | } | ||
| 1207 | |||
| 1208 | /* | ||
| 1209 | * Return the number of contiguous bytes for a given inode | ||
| 1210 | * starting at page frame idx. | ||
| 1211 | */ | ||
| 1212 | static u64 pnfs_num_cont_bytes(struct inode *inode, pgoff_t idx) | ||
| 1213 | { | ||
| 1214 | struct address_space *mapping = inode->i_mapping; | ||
| 1215 | pgoff_t end; | ||
| 1216 | |||
| 1217 | /* Optimize common case that writes from 0 to end of file */ | ||
| 1218 | end = DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE); | ||
| 1219 | if (end != NFS_I(inode)->npages) { | ||
| 1220 | rcu_read_lock(); | ||
| 1221 | end = radix_tree_next_hole(&mapping->page_tree, idx + 1, ULONG_MAX); | ||
| 1222 | rcu_read_unlock(); | ||
| 1223 | } | ||
| 1224 | |||
| 1225 | if (!end) | ||
| 1226 | return i_size_read(inode) - (idx << PAGE_CACHE_SHIFT); | ||
| 1227 | else | ||
| 1228 | return (end - idx) << PAGE_CACHE_SHIFT; | ||
| 1229 | } | ||
| 1230 | |||
| 1023 | static void | 1231 | static void |
| 1024 | bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) | 1232 | bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) |
| 1025 | { | 1233 | { |
| 1026 | if (!bl_check_alignment(req->wb_offset, req->wb_bytes, PAGE_CACHE_MASK)) | 1234 | if (pgio->pg_dreq != NULL && |
| 1235 | !is_aligned_req(req, PAGE_CACHE_SIZE)) { | ||
| 1027 | nfs_pageio_reset_write_mds(pgio); | 1236 | nfs_pageio_reset_write_mds(pgio); |
| 1028 | else | 1237 | } else { |
| 1029 | pnfs_generic_pg_init_write(pgio, req); | 1238 | u64 wb_size; |
| 1239 | if (pgio->pg_dreq == NULL) | ||
| 1240 | wb_size = pnfs_num_cont_bytes(pgio->pg_inode, | ||
| 1241 | req->wb_index); | ||
| 1242 | else | ||
| 1243 | wb_size = nfs_dreq_bytes_left(pgio->pg_dreq); | ||
| 1244 | |||
| 1245 | pnfs_generic_pg_init_write(pgio, req, wb_size); | ||
| 1246 | } | ||
| 1247 | } | ||
| 1248 | |||
| 1249 | static bool | ||
| 1250 | bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, | ||
| 1251 | struct nfs_page *req) | ||
| 1252 | { | ||
| 1253 | if (pgio->pg_dreq != NULL && | ||
| 1254 | !is_aligned_req(req, PAGE_CACHE_SIZE)) | ||
| 1255 | return false; | ||
| 1256 | |||
| 1257 | return pnfs_generic_pg_test(pgio, prev, req); | ||
| 1030 | } | 1258 | } |
| 1031 | 1259 | ||
| 1032 | static const struct nfs_pageio_ops bl_pg_read_ops = { | 1260 | static const struct nfs_pageio_ops bl_pg_read_ops = { |
| 1033 | .pg_init = bl_pg_init_read, | 1261 | .pg_init = bl_pg_init_read, |
| 1034 | .pg_test = pnfs_generic_pg_test, | 1262 | .pg_test = bl_pg_test_read, |
| 1035 | .pg_doio = pnfs_generic_pg_readpages, | 1263 | .pg_doio = pnfs_generic_pg_readpages, |
| 1036 | }; | 1264 | }; |
| 1037 | 1265 | ||
| 1038 | static const struct nfs_pageio_ops bl_pg_write_ops = { | 1266 | static const struct nfs_pageio_ops bl_pg_write_ops = { |
| 1039 | .pg_init = bl_pg_init_write, | 1267 | .pg_init = bl_pg_init_write, |
| 1040 | .pg_test = pnfs_generic_pg_test, | 1268 | .pg_test = bl_pg_test_write, |
| 1041 | .pg_doio = pnfs_generic_pg_writepages, | 1269 | .pg_doio = pnfs_generic_pg_writepages, |
| 1042 | }; | 1270 | }; |
| 1043 | 1271 | ||
