aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/backref.c2
-rw-r--r--fs/btrfs/ctree.c17
-rw-r--r--fs/btrfs/ctree.h5
-rw-r--r--fs/btrfs/disk-io.c147
-rw-r--r--fs/btrfs/extent-tree.c119
-rw-r--r--fs/btrfs/extent_io.c9
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/free-space-cache.c63
-rw-r--r--fs/btrfs/inode.c6
-rw-r--r--fs/btrfs/ioctl.c15
-rw-r--r--fs/btrfs/scrub.c2
-rw-r--r--fs/btrfs/transaction.c8
-rw-r--r--fs/btrfs/volumes.h6
-rw-r--r--fs/ceph/dir.c2
-rw-r--r--fs/ceph/inode.c9
-rw-r--r--fs/ceph/super.c6
-rw-r--r--fs/dcache.c11
-rw-r--r--fs/ext4/balloc.c2
-rw-r--r--fs/ext4/inode.c1
-rw-r--r--fs/ext4/super.c6
-rw-r--r--fs/minix/bitmap.c55
-rw-r--r--fs/minix/inode.c25
-rw-r--r--fs/minix/minix.h11
-rw-r--r--fs/namespace.c6
-rw-r--r--fs/nfs/dir.c2
-rw-r--r--fs/nfs/file.c91
-rw-r--r--fs/nfs/inode.c2
-rw-r--r--fs/nfs/internal.h2
-rw-r--r--fs/nfs/nfs3proc.c1
-rw-r--r--fs/nfs/nfs4proc.c4
-rw-r--r--fs/nfs/pnfs.c26
-rw-r--r--fs/nfs/proc.c1
-rw-r--r--fs/nfs/read.c14
33 files changed, 457 insertions, 221 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 8855aad3929c..22c64fff1bd5 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -683,7 +683,7 @@ static int inode_to_path(u64 inum, struct btrfs_inode_ref *iref,
683 return PTR_ERR(fspath); 683 return PTR_ERR(fspath);
684 684
685 if (fspath > fspath_min) { 685 if (fspath > fspath_min) {
686 ipath->fspath->val[i] = (u64)fspath; 686 ipath->fspath->val[i] = (u64)(unsigned long)fspath;
687 ++ipath->fspath->elem_cnt; 687 ++ipath->fspath->elem_cnt;
688 ipath->fspath->bytes_left = fspath - fspath_min; 688 ipath->fspath->bytes_left = fspath - fspath_min;
689 } else { 689 } else {
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 0fe615e4ea38..dede441bdeee 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -514,10 +514,25 @@ static inline int should_cow_block(struct btrfs_trans_handle *trans,
514 struct btrfs_root *root, 514 struct btrfs_root *root,
515 struct extent_buffer *buf) 515 struct extent_buffer *buf)
516{ 516{
517 /* ensure we can see the force_cow */
518 smp_rmb();
519
520 /*
521 * We do not need to cow a block if
522 * 1) this block is not created or changed in this transaction;
523 * 2) this block does not belong to TREE_RELOC tree;
524 * 3) the root is not forced COW.
525 *
526 * What is forced COW:
527 * when we create snapshot during commiting the transaction,
528 * after we've finished coping src root, we must COW the shared
529 * block to ensure the metadata consistency.
530 */
517 if (btrfs_header_generation(buf) == trans->transid && 531 if (btrfs_header_generation(buf) == trans->transid &&
518 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) && 532 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
519 !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID && 533 !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
520 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) 534 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) &&
535 !root->force_cow)
521 return 0; 536 return 0;
522 return 1; 537 return 1;
523} 538}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index b9ba59ff9292..04a5dfcee5a1 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -848,7 +848,8 @@ struct btrfs_free_cluster {
848enum btrfs_caching_type { 848enum btrfs_caching_type {
849 BTRFS_CACHE_NO = 0, 849 BTRFS_CACHE_NO = 0,
850 BTRFS_CACHE_STARTED = 1, 850 BTRFS_CACHE_STARTED = 1,
851 BTRFS_CACHE_FINISHED = 2, 851 BTRFS_CACHE_FAST = 2,
852 BTRFS_CACHE_FINISHED = 3,
852}; 853};
853 854
854enum btrfs_disk_cache_state { 855enum btrfs_disk_cache_state {
@@ -1271,6 +1272,8 @@ struct btrfs_root {
1271 * for stat. It may be used for more later 1272 * for stat. It may be used for more later
1272 */ 1273 */
1273 dev_t anon_dev; 1274 dev_t anon_dev;
1275
1276 int force_cow;
1274}; 1277};
1275 1278
1276struct btrfs_ioctl_defrag_range_args { 1279struct btrfs_ioctl_defrag_range_args {
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 622654fe051f..b09175901521 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -620,7 +620,7 @@ out:
620 620
621static int btree_io_failed_hook(struct bio *failed_bio, 621static int btree_io_failed_hook(struct bio *failed_bio,
622 struct page *page, u64 start, u64 end, 622 struct page *page, u64 start, u64 end,
623 u64 mirror_num, struct extent_state *state) 623 int mirror_num, struct extent_state *state)
624{ 624{
625 struct extent_io_tree *tree; 625 struct extent_io_tree *tree;
626 unsigned long len; 626 unsigned long len;
@@ -2569,22 +2569,10 @@ static int write_dev_supers(struct btrfs_device *device,
2569 int errors = 0; 2569 int errors = 0;
2570 u32 crc; 2570 u32 crc;
2571 u64 bytenr; 2571 u64 bytenr;
2572 int last_barrier = 0;
2573 2572
2574 if (max_mirrors == 0) 2573 if (max_mirrors == 0)
2575 max_mirrors = BTRFS_SUPER_MIRROR_MAX; 2574 max_mirrors = BTRFS_SUPER_MIRROR_MAX;
2576 2575
2577 /* make sure only the last submit_bh does a barrier */
2578 if (do_barriers) {
2579 for (i = 0; i < max_mirrors; i++) {
2580 bytenr = btrfs_sb_offset(i);
2581 if (bytenr + BTRFS_SUPER_INFO_SIZE >=
2582 device->total_bytes)
2583 break;
2584 last_barrier = i;
2585 }
2586 }
2587
2588 for (i = 0; i < max_mirrors; i++) { 2576 for (i = 0; i < max_mirrors; i++) {
2589 bytenr = btrfs_sb_offset(i); 2577 bytenr = btrfs_sb_offset(i);
2590 if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes) 2578 if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes)
@@ -2630,17 +2618,136 @@ static int write_dev_supers(struct btrfs_device *device,
2630 bh->b_end_io = btrfs_end_buffer_write_sync; 2618 bh->b_end_io = btrfs_end_buffer_write_sync;
2631 } 2619 }
2632 2620
2633 if (i == last_barrier && do_barriers) 2621 /*
2634 ret = submit_bh(WRITE_FLUSH_FUA, bh); 2622 * we fua the first super. The others we allow
2635 else 2623 * to go down lazy.
2636 ret = submit_bh(WRITE_SYNC, bh); 2624 */
2637 2625 ret = submit_bh(WRITE_FUA, bh);
2638 if (ret) 2626 if (ret)
2639 errors++; 2627 errors++;
2640 } 2628 }
2641 return errors < i ? 0 : -1; 2629 return errors < i ? 0 : -1;
2642} 2630}
2643 2631
2632/*
2633 * endio for the write_dev_flush, this will wake anyone waiting
2634 * for the barrier when it is done
2635 */
2636static void btrfs_end_empty_barrier(struct bio *bio, int err)
2637{
2638 if (err) {
2639 if (err == -EOPNOTSUPP)
2640 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
2641 clear_bit(BIO_UPTODATE, &bio->bi_flags);
2642 }
2643 if (bio->bi_private)
2644 complete(bio->bi_private);
2645 bio_put(bio);
2646}
2647
2648/*
2649 * trigger flushes for one the devices. If you pass wait == 0, the flushes are
2650 * sent down. With wait == 1, it waits for the previous flush.
2651 *
2652 * any device where the flush fails with eopnotsupp are flagged as not-barrier
2653 * capable
2654 */
2655static int write_dev_flush(struct btrfs_device *device, int wait)
2656{
2657 struct bio *bio;
2658 int ret = 0;
2659
2660 if (device->nobarriers)
2661 return 0;
2662
2663 if (wait) {
2664 bio = device->flush_bio;
2665 if (!bio)
2666 return 0;
2667
2668 wait_for_completion(&device->flush_wait);
2669
2670 if (bio_flagged(bio, BIO_EOPNOTSUPP)) {
2671 printk("btrfs: disabling barriers on dev %s\n",
2672 device->name);
2673 device->nobarriers = 1;
2674 }
2675 if (!bio_flagged(bio, BIO_UPTODATE)) {
2676 ret = -EIO;
2677 }
2678
2679 /* drop the reference from the wait == 0 run */
2680 bio_put(bio);
2681 device->flush_bio = NULL;
2682
2683 return ret;
2684 }
2685
2686 /*
2687 * one reference for us, and we leave it for the
2688 * caller
2689 */
2690 device->flush_bio = NULL;;
2691 bio = bio_alloc(GFP_NOFS, 0);
2692 if (!bio)
2693 return -ENOMEM;
2694
2695 bio->bi_end_io = btrfs_end_empty_barrier;
2696 bio->bi_bdev = device->bdev;
2697 init_completion(&device->flush_wait);
2698 bio->bi_private = &device->flush_wait;
2699 device->flush_bio = bio;
2700
2701 bio_get(bio);
2702 submit_bio(WRITE_FLUSH, bio);
2703
2704 return 0;
2705}
2706
2707/*
2708 * send an empty flush down to each device in parallel,
2709 * then wait for them
2710 */
2711static int barrier_all_devices(struct btrfs_fs_info *info)
2712{
2713 struct list_head *head;
2714 struct btrfs_device *dev;
2715 int errors = 0;
2716 int ret;
2717
2718 /* send down all the barriers */
2719 head = &info->fs_devices->devices;
2720 list_for_each_entry_rcu(dev, head, dev_list) {
2721 if (!dev->bdev) {
2722 errors++;
2723 continue;
2724 }
2725 if (!dev->in_fs_metadata || !dev->writeable)
2726 continue;
2727
2728 ret = write_dev_flush(dev, 0);
2729 if (ret)
2730 errors++;
2731 }
2732
2733 /* wait for all the barriers */
2734 list_for_each_entry_rcu(dev, head, dev_list) {
2735 if (!dev->bdev) {
2736 errors++;
2737 continue;
2738 }
2739 if (!dev->in_fs_metadata || !dev->writeable)
2740 continue;
2741
2742 ret = write_dev_flush(dev, 1);
2743 if (ret)
2744 errors++;
2745 }
2746 if (errors)
2747 return -EIO;
2748 return 0;
2749}
2750
2644int write_all_supers(struct btrfs_root *root, int max_mirrors) 2751int write_all_supers(struct btrfs_root *root, int max_mirrors)
2645{ 2752{
2646 struct list_head *head; 2753 struct list_head *head;
@@ -2662,6 +2769,10 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors)
2662 2769
2663 mutex_lock(&root->fs_info->fs_devices->device_list_mutex); 2770 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
2664 head = &root->fs_info->fs_devices->devices; 2771 head = &root->fs_info->fs_devices->devices;
2772
2773 if (do_barriers)
2774 barrier_all_devices(root->fs_info);
2775
2665 list_for_each_entry_rcu(dev, head, dev_list) { 2776 list_for_each_entry_rcu(dev, head, dev_list) {
2666 if (!dev->bdev) { 2777 if (!dev->bdev) {
2667 total_errors++; 2778 total_errors++;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b232150b5b6b..930ae8949737 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -467,13 +467,59 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
467 struct btrfs_root *root, 467 struct btrfs_root *root,
468 int load_cache_only) 468 int load_cache_only)
469{ 469{
470 DEFINE_WAIT(wait);
470 struct btrfs_fs_info *fs_info = cache->fs_info; 471 struct btrfs_fs_info *fs_info = cache->fs_info;
471 struct btrfs_caching_control *caching_ctl; 472 struct btrfs_caching_control *caching_ctl;
472 int ret = 0; 473 int ret = 0;
473 474
474 smp_mb(); 475 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
475 if (cache->cached != BTRFS_CACHE_NO) 476 BUG_ON(!caching_ctl);
477
478 INIT_LIST_HEAD(&caching_ctl->list);
479 mutex_init(&caching_ctl->mutex);
480 init_waitqueue_head(&caching_ctl->wait);
481 caching_ctl->block_group = cache;
482 caching_ctl->progress = cache->key.objectid;
483 atomic_set(&caching_ctl->count, 1);
484 caching_ctl->work.func = caching_thread;
485
486 spin_lock(&cache->lock);
487 /*
488 * This should be a rare occasion, but this could happen I think in the
489 * case where one thread starts to load the space cache info, and then
490 * some other thread starts a transaction commit which tries to do an
491 * allocation while the other thread is still loading the space cache
492 * info. The previous loop should have kept us from choosing this block
493 * group, but if we've moved to the state where we will wait on caching
494 * block groups we need to first check if we're doing a fast load here,
495 * so we can wait for it to finish, otherwise we could end up allocating
496 * from a block group who's cache gets evicted for one reason or
497 * another.
498 */
499 while (cache->cached == BTRFS_CACHE_FAST) {
500 struct btrfs_caching_control *ctl;
501
502 ctl = cache->caching_ctl;
503 atomic_inc(&ctl->count);
504 prepare_to_wait(&ctl->wait, &wait, TASK_UNINTERRUPTIBLE);
505 spin_unlock(&cache->lock);
506
507 schedule();
508
509 finish_wait(&ctl->wait, &wait);
510 put_caching_control(ctl);
511 spin_lock(&cache->lock);
512 }
513
514 if (cache->cached != BTRFS_CACHE_NO) {
515 spin_unlock(&cache->lock);
516 kfree(caching_ctl);
476 return 0; 517 return 0;
518 }
519 WARN_ON(cache->caching_ctl);
520 cache->caching_ctl = caching_ctl;
521 cache->cached = BTRFS_CACHE_FAST;
522 spin_unlock(&cache->lock);
477 523
478 /* 524 /*
479 * We can't do the read from on-disk cache during a commit since we need 525 * We can't do the read from on-disk cache during a commit since we need
@@ -484,56 +530,51 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
484 if (trans && (!trans->transaction->in_commit) && 530 if (trans && (!trans->transaction->in_commit) &&
485 (root && root != root->fs_info->tree_root) && 531 (root && root != root->fs_info->tree_root) &&
486 btrfs_test_opt(root, SPACE_CACHE)) { 532 btrfs_test_opt(root, SPACE_CACHE)) {
487 spin_lock(&cache->lock);
488 if (cache->cached != BTRFS_CACHE_NO) {
489 spin_unlock(&cache->lock);
490 return 0;
491 }
492 cache->cached = BTRFS_CACHE_STARTED;
493 spin_unlock(&cache->lock);
494
495 ret = load_free_space_cache(fs_info, cache); 533 ret = load_free_space_cache(fs_info, cache);
496 534
497 spin_lock(&cache->lock); 535 spin_lock(&cache->lock);
498 if (ret == 1) { 536 if (ret == 1) {
537 cache->caching_ctl = NULL;
499 cache->cached = BTRFS_CACHE_FINISHED; 538 cache->cached = BTRFS_CACHE_FINISHED;
500 cache->last_byte_to_unpin = (u64)-1; 539 cache->last_byte_to_unpin = (u64)-1;
501 } else { 540 } else {
502 cache->cached = BTRFS_CACHE_NO; 541 if (load_cache_only) {
542 cache->caching_ctl = NULL;
543 cache->cached = BTRFS_CACHE_NO;
544 } else {
545 cache->cached = BTRFS_CACHE_STARTED;
546 }
503 } 547 }
504 spin_unlock(&cache->lock); 548 spin_unlock(&cache->lock);
549 wake_up(&caching_ctl->wait);
505 if (ret == 1) { 550 if (ret == 1) {
551 put_caching_control(caching_ctl);
506 free_excluded_extents(fs_info->extent_root, cache); 552 free_excluded_extents(fs_info->extent_root, cache);
507 return 0; 553 return 0;
508 } 554 }
555 } else {
556 /*
557 * We are not going to do the fast caching, set cached to the
558 * appropriate value and wakeup any waiters.
559 */
560 spin_lock(&cache->lock);
561 if (load_cache_only) {
562 cache->caching_ctl = NULL;
563 cache->cached = BTRFS_CACHE_NO;
564 } else {
565 cache->cached = BTRFS_CACHE_STARTED;
566 }
567 spin_unlock(&cache->lock);
568 wake_up(&caching_ctl->wait);
509 } 569 }
510 570
511 if (load_cache_only) 571 if (load_cache_only) {
512 return 0; 572 put_caching_control(caching_ctl);
513
514 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
515 BUG_ON(!caching_ctl);
516
517 INIT_LIST_HEAD(&caching_ctl->list);
518 mutex_init(&caching_ctl->mutex);
519 init_waitqueue_head(&caching_ctl->wait);
520 caching_ctl->block_group = cache;
521 caching_ctl->progress = cache->key.objectid;
522 /* one for caching kthread, one for caching block group list */
523 atomic_set(&caching_ctl->count, 2);
524 caching_ctl->work.func = caching_thread;
525
526 spin_lock(&cache->lock);
527 if (cache->cached != BTRFS_CACHE_NO) {
528 spin_unlock(&cache->lock);
529 kfree(caching_ctl);
530 return 0; 573 return 0;
531 } 574 }
532 cache->caching_ctl = caching_ctl;
533 cache->cached = BTRFS_CACHE_STARTED;
534 spin_unlock(&cache->lock);
535 575
536 down_write(&fs_info->extent_commit_sem); 576 down_write(&fs_info->extent_commit_sem);
577 atomic_inc(&caching_ctl->count);
537 list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); 578 list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
538 up_write(&fs_info->extent_commit_sem); 579 up_write(&fs_info->extent_commit_sem);
539 580
@@ -5178,13 +5219,15 @@ search:
5178 } 5219 }
5179 5220
5180have_block_group: 5221have_block_group:
5181 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { 5222 cached = block_group_cache_done(block_group);
5223 if (unlikely(!cached)) {
5182 u64 free_percent; 5224 u64 free_percent;
5183 5225
5226 found_uncached_bg = true;
5184 ret = cache_block_group(block_group, trans, 5227 ret = cache_block_group(block_group, trans,
5185 orig_root, 1); 5228 orig_root, 1);
5186 if (block_group->cached == BTRFS_CACHE_FINISHED) 5229 if (block_group->cached == BTRFS_CACHE_FINISHED)
5187 goto have_block_group; 5230 goto alloc;
5188 5231
5189 free_percent = btrfs_block_group_used(&block_group->item); 5232 free_percent = btrfs_block_group_used(&block_group->item);
5190 free_percent *= 100; 5233 free_percent *= 100;
@@ -5206,7 +5249,6 @@ have_block_group:
5206 orig_root, 0); 5249 orig_root, 0);
5207 BUG_ON(ret); 5250 BUG_ON(ret);
5208 } 5251 }
5209 found_uncached_bg = true;
5210 5252
5211 /* 5253 /*
5212 * If loop is set for cached only, try the next block 5254 * If loop is set for cached only, try the next block
@@ -5216,10 +5258,7 @@ have_block_group:
5216 goto loop; 5258 goto loop;
5217 } 5259 }
5218 5260
5219 cached = block_group_cache_done(block_group); 5261alloc:
5220 if (unlikely(!cached))
5221 found_uncached_bg = true;
5222
5223 if (unlikely(block_group->ro)) 5262 if (unlikely(block_group->ro))
5224 goto loop; 5263 goto loop;
5225 5264
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 1f87c4d0e7a0..9472d3de5e52 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2285,8 +2285,8 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2285 clean_io_failure(start, page); 2285 clean_io_failure(start, page);
2286 } 2286 }
2287 if (!uptodate) { 2287 if (!uptodate) {
2288 u64 failed_mirror; 2288 int failed_mirror;
2289 failed_mirror = (u64)bio->bi_bdev; 2289 failed_mirror = (int)(unsigned long)bio->bi_bdev;
2290 if (tree->ops && tree->ops->readpage_io_failed_hook) 2290 if (tree->ops && tree->ops->readpage_io_failed_hook)
2291 ret = tree->ops->readpage_io_failed_hook( 2291 ret = tree->ops->readpage_io_failed_hook(
2292 bio, page, start, end, 2292 bio, page, start, end,
@@ -3366,6 +3366,9 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3366 return -ENOMEM; 3366 return -ENOMEM;
3367 path->leave_spinning = 1; 3367 path->leave_spinning = 1;
3368 3368
3369 start = ALIGN(start, BTRFS_I(inode)->root->sectorsize);
3370 len = ALIGN(len, BTRFS_I(inode)->root->sectorsize);
3371
3369 /* 3372 /*
3370 * lookup the last file extent. We're not using i_size here 3373 * lookup the last file extent. We're not using i_size here
3371 * because there might be preallocation past i_size 3374 * because there might be preallocation past i_size
@@ -3413,7 +3416,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3413 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, 3416 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
3414 &cached_state, GFP_NOFS); 3417 &cached_state, GFP_NOFS);
3415 3418
3416 em = get_extent_skip_holes(inode, off, last_for_get_extent, 3419 em = get_extent_skip_holes(inode, start, last_for_get_extent,
3417 get_extent); 3420 get_extent);
3418 if (!em) 3421 if (!em)
3419 goto out; 3422 goto out;
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index feb9be0e23bc..7604c3001322 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -70,7 +70,7 @@ struct extent_io_ops {
70 unsigned long bio_flags); 70 unsigned long bio_flags);
71 int (*readpage_io_hook)(struct page *page, u64 start, u64 end); 71 int (*readpage_io_hook)(struct page *page, u64 start, u64 end);
72 int (*readpage_io_failed_hook)(struct bio *bio, struct page *page, 72 int (*readpage_io_failed_hook)(struct bio *bio, struct page *page,
73 u64 start, u64 end, u64 failed_mirror, 73 u64 start, u64 end, int failed_mirror,
74 struct extent_state *state); 74 struct extent_state *state);
75 int (*writepage_io_failed_hook)(struct bio *bio, struct page *page, 75 int (*writepage_io_failed_hook)(struct bio *bio, struct page *page,
76 u64 start, u64 end, 76 u64 start, u64 end,
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 181760f9d2ab..6e5b7e463698 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -351,6 +351,11 @@ static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode,
351 } 351 }
352 } 352 }
353 353
354 for (i = 0; i < io_ctl->num_pages; i++) {
355 clear_page_dirty_for_io(io_ctl->pages[i]);
356 set_page_extent_mapped(io_ctl->pages[i]);
357 }
358
354 return 0; 359 return 0;
355} 360}
356 361
@@ -1844,7 +1849,13 @@ again:
1844 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 1849 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1845 1, 0); 1850 1, 0);
1846 if (!info) { 1851 if (!info) {
1847 WARN_ON(1); 1852 /* the tree logging code might be calling us before we
1853 * have fully loaded the free space rbtree for this
1854 * block group. So it is possible the entry won't
1855 * be in the rbtree yet at all. The caching code
1856 * will make sure not to put it in the rbtree if
1857 * the logging code has pinned it.
1858 */
1848 goto out_lock; 1859 goto out_lock;
1849 } 1860 }
1850 } 1861 }
@@ -2451,16 +2462,23 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
2451{ 2462{
2452 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2463 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2453 struct btrfs_free_space *entry; 2464 struct btrfs_free_space *entry;
2454 struct rb_node *node;
2455 int ret = -ENOSPC; 2465 int ret = -ENOSPC;
2466 u64 bitmap_offset = offset_to_bitmap(ctl, offset);
2456 2467
2457 if (ctl->total_bitmaps == 0) 2468 if (ctl->total_bitmaps == 0)
2458 return -ENOSPC; 2469 return -ENOSPC;
2459 2470
2460 /* 2471 /*
2461 * First check our cached list of bitmaps and see if there is an entry 2472 * The bitmap that covers offset won't be in the list unless offset
2462 * here that will work. 2473 * is just its start offset.
2463 */ 2474 */
2475 entry = list_first_entry(bitmaps, struct btrfs_free_space, list);
2476 if (entry->offset != bitmap_offset) {
2477 entry = tree_search_offset(ctl, bitmap_offset, 1, 0);
2478 if (entry && list_empty(&entry->list))
2479 list_add(&entry->list, bitmaps);
2480 }
2481
2464 list_for_each_entry(entry, bitmaps, list) { 2482 list_for_each_entry(entry, bitmaps, list) {
2465 if (entry->bytes < min_bytes) 2483 if (entry->bytes < min_bytes)
2466 continue; 2484 continue;
@@ -2471,38 +2489,10 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
2471 } 2489 }
2472 2490
2473 /* 2491 /*
2474 * If we do have entries on our list and we are here then we didn't find 2492 * The bitmaps list has all the bitmaps that record free space
2475 * anything, so go ahead and get the next entry after the last entry in 2493 * starting after offset, so no more search is required.
2476 * this list and start the search from there.
2477 */ 2494 */
2478 if (!list_empty(bitmaps)) { 2495 return -ENOSPC;
2479 entry = list_entry(bitmaps->prev, struct btrfs_free_space,
2480 list);
2481 node = rb_next(&entry->offset_index);
2482 if (!node)
2483 return -ENOSPC;
2484 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2485 goto search;
2486 }
2487
2488 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 0, 1);
2489 if (!entry)
2490 return -ENOSPC;
2491
2492search:
2493 node = &entry->offset_index;
2494 do {
2495 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2496 node = rb_next(&entry->offset_index);
2497 if (!entry->bitmap)
2498 continue;
2499 if (entry->bytes < min_bytes)
2500 continue;
2501 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
2502 bytes, min_bytes);
2503 } while (ret && node);
2504
2505 return ret;
2506} 2496}
2507 2497
2508/* 2498/*
@@ -2520,8 +2510,8 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
2520 u64 offset, u64 bytes, u64 empty_size) 2510 u64 offset, u64 bytes, u64 empty_size)
2521{ 2511{
2522 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2512 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2523 struct list_head bitmaps;
2524 struct btrfs_free_space *entry, *tmp; 2513 struct btrfs_free_space *entry, *tmp;
2514 LIST_HEAD(bitmaps);
2525 u64 min_bytes; 2515 u64 min_bytes;
2526 int ret; 2516 int ret;
2527 2517
@@ -2560,7 +2550,6 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
2560 goto out; 2550 goto out;
2561 } 2551 }
2562 2552
2563 INIT_LIST_HEAD(&bitmaps);
2564 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset, 2553 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
2565 bytes, min_bytes); 2554 bytes, min_bytes);
2566 if (ret) 2555 if (ret)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 116ab67a06df..526dd51a1966 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6794,11 +6794,13 @@ static int btrfs_getattr(struct vfsmount *mnt,
6794 struct dentry *dentry, struct kstat *stat) 6794 struct dentry *dentry, struct kstat *stat)
6795{ 6795{
6796 struct inode *inode = dentry->d_inode; 6796 struct inode *inode = dentry->d_inode;
6797 u32 blocksize = inode->i_sb->s_blocksize;
6798
6797 generic_fillattr(inode, stat); 6799 generic_fillattr(inode, stat);
6798 stat->dev = BTRFS_I(inode)->root->anon_dev; 6800 stat->dev = BTRFS_I(inode)->root->anon_dev;
6799 stat->blksize = PAGE_CACHE_SIZE; 6801 stat->blksize = PAGE_CACHE_SIZE;
6800 stat->blocks = (inode_get_bytes(inode) + 6802 stat->blocks = (ALIGN(inode_get_bytes(inode), blocksize) +
6801 BTRFS_I(inode)->delalloc_bytes) >> 9; 6803 ALIGN(BTRFS_I(inode)->delalloc_bytes, blocksize)) >> 9;
6802 return 0; 6804 return 0;
6803} 6805}
6804 6806
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 4a34c472f126..a90e749ed6d2 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1216,12 +1216,12 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
1216 *devstr = '\0'; 1216 *devstr = '\0';
1217 devstr = vol_args->name; 1217 devstr = vol_args->name;
1218 devid = simple_strtoull(devstr, &end, 10); 1218 devid = simple_strtoull(devstr, &end, 10);
1219 printk(KERN_INFO "resizing devid %llu\n", 1219 printk(KERN_INFO "btrfs: resizing devid %llu\n",
1220 (unsigned long long)devid); 1220 (unsigned long long)devid);
1221 } 1221 }
1222 device = btrfs_find_device(root, devid, NULL, NULL); 1222 device = btrfs_find_device(root, devid, NULL, NULL);
1223 if (!device) { 1223 if (!device) {
1224 printk(KERN_INFO "resizer unable to find device %llu\n", 1224 printk(KERN_INFO "btrfs: resizer unable to find device %llu\n",
1225 (unsigned long long)devid); 1225 (unsigned long long)devid);
1226 ret = -EINVAL; 1226 ret = -EINVAL;
1227 goto out_unlock; 1227 goto out_unlock;
@@ -1267,7 +1267,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
1267 do_div(new_size, root->sectorsize); 1267 do_div(new_size, root->sectorsize);
1268 new_size *= root->sectorsize; 1268 new_size *= root->sectorsize;
1269 1269
1270 printk(KERN_INFO "new size for %s is %llu\n", 1270 printk(KERN_INFO "btrfs: new size for %s is %llu\n",
1271 device->name, (unsigned long long)new_size); 1271 device->name, (unsigned long long)new_size);
1272 1272
1273 if (new_size > old_size) { 1273 if (new_size > old_size) {
@@ -2930,11 +2930,13 @@ static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg)
2930 goto out; 2930 goto out;
2931 2931
2932 for (i = 0; i < ipath->fspath->elem_cnt; ++i) { 2932 for (i = 0; i < ipath->fspath->elem_cnt; ++i) {
2933 rel_ptr = ipath->fspath->val[i] - (u64)ipath->fspath->val; 2933 rel_ptr = ipath->fspath->val[i] -
2934 (u64)(unsigned long)ipath->fspath->val;
2934 ipath->fspath->val[i] = rel_ptr; 2935 ipath->fspath->val[i] = rel_ptr;
2935 } 2936 }
2936 2937
2937 ret = copy_to_user((void *)ipa->fspath, (void *)ipath->fspath, size); 2938 ret = copy_to_user((void *)(unsigned long)ipa->fspath,
2939 (void *)(unsigned long)ipath->fspath, size);
2938 if (ret) { 2940 if (ret) {
2939 ret = -EFAULT; 2941 ret = -EFAULT;
2940 goto out; 2942 goto out;
@@ -3017,7 +3019,8 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root,
3017 if (ret < 0) 3019 if (ret < 0)
3018 goto out; 3020 goto out;
3019 3021
3020 ret = copy_to_user((void *)loi->inodes, (void *)inodes, size); 3022 ret = copy_to_user((void *)(unsigned long)loi->inodes,
3023 (void *)(unsigned long)inodes, size);
3021 if (ret) 3024 if (ret)
3022 ret = -EFAULT; 3025 ret = -EFAULT;
3023 3026
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index f4190f22edfb..fab420db5121 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -272,7 +272,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, void *ctx)
272 swarn->logical, swarn->dev->name, 272 swarn->logical, swarn->dev->name,
273 (unsigned long long)swarn->sector, root, inum, offset, 273 (unsigned long long)swarn->sector, root, inum, offset,
274 min(isize - offset, (u64)PAGE_SIZE), nlink, 274 min(isize - offset, (u64)PAGE_SIZE), nlink,
275 (char *)ipath->fspath->val[i]); 275 (char *)(unsigned long)ipath->fspath->val[i]);
276 276
277 free_ipath(ipath); 277 free_ipath(ipath);
278 return 0; 278 return 0;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 6a0574e923bc..81376d94cd3c 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -785,6 +785,10 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
785 785
786 btrfs_save_ino_cache(root, trans); 786 btrfs_save_ino_cache(root, trans);
787 787
788 /* see comments in should_cow_block() */
789 root->force_cow = 0;
790 smp_wmb();
791
788 if (root->commit_root != root->node) { 792 if (root->commit_root != root->node) {
789 mutex_lock(&root->fs_commit_mutex); 793 mutex_lock(&root->fs_commit_mutex);
790 switch_commit_root(root); 794 switch_commit_root(root);
@@ -947,6 +951,10 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
947 btrfs_tree_unlock(old); 951 btrfs_tree_unlock(old);
948 free_extent_buffer(old); 952 free_extent_buffer(old);
949 953
954 /* see comments in should_cow_block() */
955 root->force_cow = 1;
956 smp_wmb();
957
950 btrfs_set_root_node(new_root_item, tmp); 958 btrfs_set_root_node(new_root_item, tmp);
951 /* record when the snapshot was created in key.offset */ 959 /* record when the snapshot was created in key.offset */
952 key.offset = trans->transid; 960 key.offset = trans->transid;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index ab5b1c49f352..78f2d4d4f37f 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -100,6 +100,12 @@ struct btrfs_device {
100 struct reada_zone *reada_curr_zone; 100 struct reada_zone *reada_curr_zone;
101 struct radix_tree_root reada_zones; 101 struct radix_tree_root reada_zones;
102 struct radix_tree_root reada_extents; 102 struct radix_tree_root reada_extents;
103
104 /* for sending down flush barriers */
105 struct bio *flush_bio;
106 struct completion flush_wait;
107 int nobarriers;
108
103}; 109};
104 110
105struct btrfs_fs_devices { 111struct btrfs_fs_devices {
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 2abd0dfad7f8..bca3948e9dbf 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1143,7 +1143,7 @@ static void ceph_d_prune(struct dentry *dentry)
1143{ 1143{
1144 struct ceph_dentry_info *di; 1144 struct ceph_dentry_info *di;
1145 1145
1146 dout("d_release %p\n", dentry); 1146 dout("ceph_d_prune %p\n", dentry);
1147 1147
1148 /* do we have a valid parent? */ 1148 /* do we have a valid parent? */
1149 if (!dentry->d_parent || IS_ROOT(dentry)) 1149 if (!dentry->d_parent || IS_ROOT(dentry))
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index e392bfce84a3..116f36502f17 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1328,12 +1328,13 @@ int ceph_inode_set_size(struct inode *inode, loff_t size)
1328 */ 1328 */
1329void ceph_queue_writeback(struct inode *inode) 1329void ceph_queue_writeback(struct inode *inode)
1330{ 1330{
1331 ihold(inode);
1331 if (queue_work(ceph_inode_to_client(inode)->wb_wq, 1332 if (queue_work(ceph_inode_to_client(inode)->wb_wq,
1332 &ceph_inode(inode)->i_wb_work)) { 1333 &ceph_inode(inode)->i_wb_work)) {
1333 dout("ceph_queue_writeback %p\n", inode); 1334 dout("ceph_queue_writeback %p\n", inode);
1334 ihold(inode);
1335 } else { 1335 } else {
1336 dout("ceph_queue_writeback %p failed\n", inode); 1336 dout("ceph_queue_writeback %p failed\n", inode);
1337 iput(inode);
1337 } 1338 }
1338} 1339}
1339 1340
@@ -1353,12 +1354,13 @@ static void ceph_writeback_work(struct work_struct *work)
1353 */ 1354 */
1354void ceph_queue_invalidate(struct inode *inode) 1355void ceph_queue_invalidate(struct inode *inode)
1355{ 1356{
1357 ihold(inode);
1356 if (queue_work(ceph_inode_to_client(inode)->pg_inv_wq, 1358 if (queue_work(ceph_inode_to_client(inode)->pg_inv_wq,
1357 &ceph_inode(inode)->i_pg_inv_work)) { 1359 &ceph_inode(inode)->i_pg_inv_work)) {
1358 dout("ceph_queue_invalidate %p\n", inode); 1360 dout("ceph_queue_invalidate %p\n", inode);
1359 ihold(inode);
1360 } else { 1361 } else {
1361 dout("ceph_queue_invalidate %p failed\n", inode); 1362 dout("ceph_queue_invalidate %p failed\n", inode);
1363 iput(inode);
1362 } 1364 }
1363} 1365}
1364 1366
@@ -1434,13 +1436,14 @@ void ceph_queue_vmtruncate(struct inode *inode)
1434{ 1436{
1435 struct ceph_inode_info *ci = ceph_inode(inode); 1437 struct ceph_inode_info *ci = ceph_inode(inode);
1436 1438
1439 ihold(inode);
1437 if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq, 1440 if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq,
1438 &ci->i_vmtruncate_work)) { 1441 &ci->i_vmtruncate_work)) {
1439 dout("ceph_queue_vmtruncate %p\n", inode); 1442 dout("ceph_queue_vmtruncate %p\n", inode);
1440 ihold(inode);
1441 } else { 1443 } else {
1442 dout("ceph_queue_vmtruncate %p failed, pending=%d\n", 1444 dout("ceph_queue_vmtruncate %p failed, pending=%d\n",
1443 inode, ci->i_truncate_pending); 1445 inode, ci->i_truncate_pending);
1446 iput(inode);
1444 } 1447 }
1445} 1448}
1446 1449
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index a90846fac759..8dc73a594a90 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -638,10 +638,12 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
638 if (err == 0) { 638 if (err == 0) {
639 dout("open_root_inode success\n"); 639 dout("open_root_inode success\n");
640 if (ceph_ino(req->r_target_inode) == CEPH_INO_ROOT && 640 if (ceph_ino(req->r_target_inode) == CEPH_INO_ROOT &&
641 fsc->sb->s_root == NULL) 641 fsc->sb->s_root == NULL) {
642 root = d_alloc_root(req->r_target_inode); 642 root = d_alloc_root(req->r_target_inode);
643 else 643 ceph_init_dentry(root);
644 } else {
644 root = d_obtain_alias(req->r_target_inode); 645 root = d_obtain_alias(req->r_target_inode);
646 }
645 req->r_target_inode = NULL; 647 req->r_target_inode = NULL;
646 dout("open_root_inode success, root dentry is %p\n", root); 648 dout("open_root_inode success, root dentry is %p\n", root);
647 } else { 649 } else {
diff --git a/fs/dcache.c b/fs/dcache.c
index a901c6901bce..10ba92def3f6 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -36,6 +36,7 @@
36#include <linux/bit_spinlock.h> 36#include <linux/bit_spinlock.h>
37#include <linux/rculist_bl.h> 37#include <linux/rculist_bl.h>
38#include <linux/prefetch.h> 38#include <linux/prefetch.h>
39#include <linux/ratelimit.h>
39#include "internal.h" 40#include "internal.h"
40 41
41/* 42/*
@@ -2383,8 +2384,16 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
2383 actual = __d_unalias(inode, dentry, alias); 2384 actual = __d_unalias(inode, dentry, alias);
2384 } 2385 }
2385 write_sequnlock(&rename_lock); 2386 write_sequnlock(&rename_lock);
2386 if (IS_ERR(actual)) 2387 if (IS_ERR(actual)) {
2388 if (PTR_ERR(actual) == -ELOOP)
2389 pr_warn_ratelimited(
2390 "VFS: Lookup of '%s' in %s %s"
2391 " would have caused loop\n",
2392 dentry->d_name.name,
2393 inode->i_sb->s_type->name,
2394 inode->i_sb->s_id);
2387 dput(alias); 2395 dput(alias);
2396 }
2388 goto out_nolock; 2397 goto out_nolock;
2389 } 2398 }
2390 } 2399 }
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index f6dba4505f1c..12ccacda44e0 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -565,7 +565,7 @@ ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb)
565 brelse(bitmap_bh); 565 brelse(bitmap_bh);
566 printk(KERN_DEBUG "ext4_count_free_clusters: stored = %llu" 566 printk(KERN_DEBUG "ext4_count_free_clusters: stored = %llu"
567 ", computed = %llu, %llu\n", 567 ", computed = %llu, %llu\n",
568 EXT4_B2C(sbi, ext4_free_blocks_count(es)), 568 EXT4_B2C(EXT4_SB(sb), ext4_free_blocks_count(es)),
569 desc_count, bitmap_count); 569 desc_count, bitmap_count);
570 return bitmap_count; 570 return bitmap_count;
571#else 571#else
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 240f6e2dc7ee..fffec40d5996 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2270,6 +2270,7 @@ retry:
2270 ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: " 2270 ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: "
2271 "%ld pages, ino %lu; err %d", __func__, 2271 "%ld pages, ino %lu; err %d", __func__,
2272 wbc->nr_to_write, inode->i_ino, ret); 2272 wbc->nr_to_write, inode->i_ino, ret);
2273 blk_finish_plug(&plug);
2273 goto out_writepages; 2274 goto out_writepages;
2274 } 2275 }
2275 2276
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 877350ef0253..1c7bbd00e7e5 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1683,7 +1683,9 @@ static int parse_options(char *options, struct super_block *sb,
1683 data_opt = EXT4_MOUNT_WRITEBACK_DATA; 1683 data_opt = EXT4_MOUNT_WRITEBACK_DATA;
1684 datacheck: 1684 datacheck:
1685 if (is_remount) { 1685 if (is_remount) {
1686 if (test_opt(sb, DATA_FLAGS) != data_opt) { 1686 if (!sbi->s_journal)
1687 ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option");
1688 else if (test_opt(sb, DATA_FLAGS) != data_opt) {
1687 ext4_msg(sb, KERN_ERR, 1689 ext4_msg(sb, KERN_ERR,
1688 "Cannot change data mode on remount"); 1690 "Cannot change data mode on remount");
1689 return 0; 1691 return 0;
@@ -3098,8 +3100,6 @@ static void ext4_destroy_lazyinit_thread(void)
3098} 3100}
3099 3101
3100static int ext4_fill_super(struct super_block *sb, void *data, int silent) 3102static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3101 __releases(kernel_lock)
3102 __acquires(kernel_lock)
3103{ 3103{
3104 char *orig_data = kstrdup(data, GFP_KERNEL); 3104 char *orig_data = kstrdup(data, GFP_KERNEL);
3105 struct buffer_head *bh; 3105 struct buffer_head *bh;
diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c
index 3f32bcb0d9bd..ef175cb8cfd8 100644
--- a/fs/minix/bitmap.c
+++ b/fs/minix/bitmap.c
@@ -16,38 +16,26 @@
16#include <linux/bitops.h> 16#include <linux/bitops.h>
17#include <linux/sched.h> 17#include <linux/sched.h>
18 18
19static const int nibblemap[] = { 4,3,3,2,3,2,2,1,3,2,2,1,2,1,1,0 };
20
21static DEFINE_SPINLOCK(bitmap_lock); 19static DEFINE_SPINLOCK(bitmap_lock);
22 20
23static unsigned long count_free(struct buffer_head *map[], unsigned numblocks, __u32 numbits) 21/*
22 * bitmap consists of blocks filled with 16bit words
23 * bit set == busy, bit clear == free
24 * endianness is a mess, but for counting zero bits it really doesn't matter...
25 */
26static __u32 count_free(struct buffer_head *map[], unsigned blocksize, __u32 numbits)
24{ 27{
25 unsigned i, j, sum = 0; 28 __u32 sum = 0;
26 struct buffer_head *bh; 29 unsigned blocks = DIV_ROUND_UP(numbits, blocksize * 8);
27
28 for (i=0; i<numblocks-1; i++) {
29 if (!(bh=map[i]))
30 return(0);
31 for (j=0; j<bh->b_size; j++)
32 sum += nibblemap[bh->b_data[j] & 0xf]
33 + nibblemap[(bh->b_data[j]>>4) & 0xf];
34 }
35 30
36 if (numblocks==0 || !(bh=map[numblocks-1])) 31 while (blocks--) {
37 return(0); 32 unsigned words = blocksize / 2;
38 i = ((numbits - (numblocks-1) * bh->b_size * 8) / 16) * 2; 33 __u16 *p = (__u16 *)(*map++)->b_data;
39 for (j=0; j<i; j++) { 34 while (words--)
40 sum += nibblemap[bh->b_data[j] & 0xf] 35 sum += 16 - hweight16(*p++);
41 + nibblemap[(bh->b_data[j]>>4) & 0xf];
42 } 36 }
43 37
44 i = numbits%16; 38 return sum;
45 if (i!=0) {
46 i = *(__u16 *)(&bh->b_data[j]) | ~((1<<i) - 1);
47 sum += nibblemap[i & 0xf] + nibblemap[(i>>4) & 0xf];
48 sum += nibblemap[(i>>8) & 0xf] + nibblemap[(i>>12) & 0xf];
49 }
50 return(sum);
51} 39}
52 40
53void minix_free_block(struct inode *inode, unsigned long block) 41void minix_free_block(struct inode *inode, unsigned long block)
@@ -105,10 +93,12 @@ int minix_new_block(struct inode * inode)
105 return 0; 93 return 0;
106} 94}
107 95
108unsigned long minix_count_free_blocks(struct minix_sb_info *sbi) 96unsigned long minix_count_free_blocks(struct super_block *sb)
109{ 97{
110 return (count_free(sbi->s_zmap, sbi->s_zmap_blocks, 98 struct minix_sb_info *sbi = minix_sb(sb);
111 sbi->s_nzones - sbi->s_firstdatazone + 1) 99 u32 bits = sbi->s_nzones - (sbi->s_firstdatazone + 1);
100
101 return (count_free(sbi->s_zmap, sb->s_blocksize, bits)
112 << sbi->s_log_zone_size); 102 << sbi->s_log_zone_size);
113} 103}
114 104
@@ -273,7 +263,10 @@ struct inode *minix_new_inode(const struct inode *dir, int mode, int *error)
273 return inode; 263 return inode;
274} 264}
275 265
276unsigned long minix_count_free_inodes(struct minix_sb_info *sbi) 266unsigned long minix_count_free_inodes(struct super_block *sb)
277{ 267{
278 return count_free(sbi->s_imap, sbi->s_imap_blocks, sbi->s_ninodes + 1); 268 struct minix_sb_info *sbi = minix_sb(sb);
269 u32 bits = sbi->s_ninodes + 1;
270
271 return count_free(sbi->s_imap, sb->s_blocksize, bits);
279} 272}
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 64cdcd662ffc..1d9e33966db0 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -279,6 +279,27 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
279 else if (sbi->s_mount_state & MINIX_ERROR_FS) 279 else if (sbi->s_mount_state & MINIX_ERROR_FS)
280 printk("MINIX-fs: mounting file system with errors, " 280 printk("MINIX-fs: mounting file system with errors, "
281 "running fsck is recommended\n"); 281 "running fsck is recommended\n");
282
283 /* Apparently minix can create filesystems that allocate more blocks for
284 * the bitmaps than needed. We simply ignore that, but verify it didn't
285 * create one with not enough blocks and bail out if so.
286 */
287 block = minix_blocks_needed(sbi->s_ninodes, s->s_blocksize);
288 if (sbi->s_imap_blocks < block) {
289 printk("MINIX-fs: file system does not have enough "
290 "imap blocks allocated. Refusing to mount\n");
291 goto out_iput;
292 }
293
294 block = minix_blocks_needed(
295 (sbi->s_nzones - (sbi->s_firstdatazone + 1)),
296 s->s_blocksize);
297 if (sbi->s_zmap_blocks < block) {
298 printk("MINIX-fs: file system does not have enough "
299 "zmap blocks allocated. Refusing to mount.\n");
300 goto out_iput;
301 }
302
282 return 0; 303 return 0;
283 304
284out_iput: 305out_iput:
@@ -339,10 +360,10 @@ static int minix_statfs(struct dentry *dentry, struct kstatfs *buf)
339 buf->f_type = sb->s_magic; 360 buf->f_type = sb->s_magic;
340 buf->f_bsize = sb->s_blocksize; 361 buf->f_bsize = sb->s_blocksize;
341 buf->f_blocks = (sbi->s_nzones - sbi->s_firstdatazone) << sbi->s_log_zone_size; 362 buf->f_blocks = (sbi->s_nzones - sbi->s_firstdatazone) << sbi->s_log_zone_size;
342 buf->f_bfree = minix_count_free_blocks(sbi); 363 buf->f_bfree = minix_count_free_blocks(sb);
343 buf->f_bavail = buf->f_bfree; 364 buf->f_bavail = buf->f_bfree;
344 buf->f_files = sbi->s_ninodes; 365 buf->f_files = sbi->s_ninodes;
345 buf->f_ffree = minix_count_free_inodes(sbi); 366 buf->f_ffree = minix_count_free_inodes(sb);
346 buf->f_namelen = sbi->s_namelen; 367 buf->f_namelen = sbi->s_namelen;
347 buf->f_fsid.val[0] = (u32)id; 368 buf->f_fsid.val[0] = (u32)id;
348 buf->f_fsid.val[1] = (u32)(id >> 32); 369 buf->f_fsid.val[1] = (u32)(id >> 32);
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index 341e2122879a..26bbd55e82ea 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -48,10 +48,10 @@ extern struct minix_inode * minix_V1_raw_inode(struct super_block *, ino_t, stru
48extern struct minix2_inode * minix_V2_raw_inode(struct super_block *, ino_t, struct buffer_head **); 48extern struct minix2_inode * minix_V2_raw_inode(struct super_block *, ino_t, struct buffer_head **);
49extern struct inode * minix_new_inode(const struct inode *, int, int *); 49extern struct inode * minix_new_inode(const struct inode *, int, int *);
50extern void minix_free_inode(struct inode * inode); 50extern void minix_free_inode(struct inode * inode);
51extern unsigned long minix_count_free_inodes(struct minix_sb_info *sbi); 51extern unsigned long minix_count_free_inodes(struct super_block *sb);
52extern int minix_new_block(struct inode * inode); 52extern int minix_new_block(struct inode * inode);
53extern void minix_free_block(struct inode *inode, unsigned long block); 53extern void minix_free_block(struct inode *inode, unsigned long block);
54extern unsigned long minix_count_free_blocks(struct minix_sb_info *sbi); 54extern unsigned long minix_count_free_blocks(struct super_block *sb);
55extern int minix_getattr(struct vfsmount *, struct dentry *, struct kstat *); 55extern int minix_getattr(struct vfsmount *, struct dentry *, struct kstat *);
56extern int minix_prepare_chunk(struct page *page, loff_t pos, unsigned len); 56extern int minix_prepare_chunk(struct page *page, loff_t pos, unsigned len);
57 57
@@ -88,6 +88,11 @@ static inline struct minix_inode_info *minix_i(struct inode *inode)
88 return list_entry(inode, struct minix_inode_info, vfs_inode); 88 return list_entry(inode, struct minix_inode_info, vfs_inode);
89} 89}
90 90
91static inline unsigned minix_blocks_needed(unsigned bits, unsigned blocksize)
92{
93 return DIV_ROUND_UP(bits, blocksize * 8);
94}
95
91#if defined(CONFIG_MINIX_FS_NATIVE_ENDIAN) && \ 96#if defined(CONFIG_MINIX_FS_NATIVE_ENDIAN) && \
92 defined(CONFIG_MINIX_FS_BIG_ENDIAN_16BIT_INDEXED) 97 defined(CONFIG_MINIX_FS_BIG_ENDIAN_16BIT_INDEXED)
93 98
@@ -125,7 +130,7 @@ static inline int minix_find_first_zero_bit(const void *vaddr, unsigned size)
125 if (!size) 130 if (!size)
126 return 0; 131 return 0;
127 132
128 size = (size >> 4) + ((size & 15) > 0); 133 size >>= 4;
129 while (*p++ == 0xffff) { 134 while (*p++ == 0xffff) {
130 if (--size == 0) 135 if (--size == 0)
131 return (p - addr) << 4; 136 return (p - addr) << 4;
diff --git a/fs/namespace.c b/fs/namespace.c
index 50ee30345b4f..6d3a1963879b 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2493,6 +2493,7 @@ EXPORT_SYMBOL(create_mnt_ns);
2493struct dentry *mount_subtree(struct vfsmount *mnt, const char *name) 2493struct dentry *mount_subtree(struct vfsmount *mnt, const char *name)
2494{ 2494{
2495 struct mnt_namespace *ns; 2495 struct mnt_namespace *ns;
2496 struct super_block *s;
2496 struct path path; 2497 struct path path;
2497 int err; 2498 int err;
2498 2499
@@ -2509,10 +2510,11 @@ struct dentry *mount_subtree(struct vfsmount *mnt, const char *name)
2509 return ERR_PTR(err); 2510 return ERR_PTR(err);
2510 2511
2511 /* trade a vfsmount reference for active sb one */ 2512 /* trade a vfsmount reference for active sb one */
2512 atomic_inc(&path.mnt->mnt_sb->s_active); 2513 s = path.mnt->mnt_sb;
2514 atomic_inc(&s->s_active);
2513 mntput(path.mnt); 2515 mntput(path.mnt);
2514 /* lock the sucker */ 2516 /* lock the sucker */
2515 down_write(&path.mnt->mnt_sb->s_umount); 2517 down_write(&s->s_umount);
2516 /* ... and return the root of (sub)tree on it */ 2518 /* ... and return the root of (sub)tree on it */
2517 return path.dentry; 2519 return path.dentry;
2518} 2520}
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index b238d95ac48c..ac2899098147 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1468,12 +1468,12 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
1468 res = NULL; 1468 res = NULL;
1469 goto out; 1469 goto out;
1470 /* This turned out not to be a regular file */ 1470 /* This turned out not to be a regular file */
1471 case -EISDIR:
1471 case -ENOTDIR: 1472 case -ENOTDIR:
1472 goto no_open; 1473 goto no_open;
1473 case -ELOOP: 1474 case -ELOOP:
1474 if (!(nd->intent.open.flags & O_NOFOLLOW)) 1475 if (!(nd->intent.open.flags & O_NOFOLLOW))
1475 goto no_open; 1476 goto no_open;
1476 /* case -EISDIR: */
1477 /* case -EINVAL: */ 1477 /* case -EINVAL: */
1478 default: 1478 default:
1479 res = ERR_CAST(inode); 1479 res = ERR_CAST(inode);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 0a1f8312b4dc..eca56d4b39c0 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -40,48 +40,8 @@
40 40
41#define NFSDBG_FACILITY NFSDBG_FILE 41#define NFSDBG_FACILITY NFSDBG_FILE
42 42
43static int nfs_file_open(struct inode *, struct file *);
44static int nfs_file_release(struct inode *, struct file *);
45static loff_t nfs_file_llseek(struct file *file, loff_t offset, int origin);
46static int nfs_file_mmap(struct file *, struct vm_area_struct *);
47static ssize_t nfs_file_splice_read(struct file *filp, loff_t *ppos,
48 struct pipe_inode_info *pipe,
49 size_t count, unsigned int flags);
50static ssize_t nfs_file_read(struct kiocb *, const struct iovec *iov,
51 unsigned long nr_segs, loff_t pos);
52static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe,
53 struct file *filp, loff_t *ppos,
54 size_t count, unsigned int flags);
55static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov,
56 unsigned long nr_segs, loff_t pos);
57static int nfs_file_flush(struct file *, fl_owner_t id);
58static int nfs_file_fsync(struct file *, loff_t, loff_t, int datasync);
59static int nfs_check_flags(int flags);
60static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl);
61static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl);
62static int nfs_setlease(struct file *file, long arg, struct file_lock **fl);
63
64static const struct vm_operations_struct nfs_file_vm_ops; 43static const struct vm_operations_struct nfs_file_vm_ops;
65 44
66const struct file_operations nfs_file_operations = {
67 .llseek = nfs_file_llseek,
68 .read = do_sync_read,
69 .write = do_sync_write,
70 .aio_read = nfs_file_read,
71 .aio_write = nfs_file_write,
72 .mmap = nfs_file_mmap,
73 .open = nfs_file_open,
74 .flush = nfs_file_flush,
75 .release = nfs_file_release,
76 .fsync = nfs_file_fsync,
77 .lock = nfs_lock,
78 .flock = nfs_flock,
79 .splice_read = nfs_file_splice_read,
80 .splice_write = nfs_file_splice_write,
81 .check_flags = nfs_check_flags,
82 .setlease = nfs_setlease,
83};
84
85const struct inode_operations nfs_file_inode_operations = { 45const struct inode_operations nfs_file_inode_operations = {
86 .permission = nfs_permission, 46 .permission = nfs_permission,
87 .getattr = nfs_getattr, 47 .getattr = nfs_getattr,
@@ -886,3 +846,54 @@ static int nfs_setlease(struct file *file, long arg, struct file_lock **fl)
886 file->f_path.dentry->d_name.name, arg); 846 file->f_path.dentry->d_name.name, arg);
887 return -EINVAL; 847 return -EINVAL;
888} 848}
849
850const struct file_operations nfs_file_operations = {
851 .llseek = nfs_file_llseek,
852 .read = do_sync_read,
853 .write = do_sync_write,
854 .aio_read = nfs_file_read,
855 .aio_write = nfs_file_write,
856 .mmap = nfs_file_mmap,
857 .open = nfs_file_open,
858 .flush = nfs_file_flush,
859 .release = nfs_file_release,
860 .fsync = nfs_file_fsync,
861 .lock = nfs_lock,
862 .flock = nfs_flock,
863 .splice_read = nfs_file_splice_read,
864 .splice_write = nfs_file_splice_write,
865 .check_flags = nfs_check_flags,
866 .setlease = nfs_setlease,
867};
868
869#ifdef CONFIG_NFS_V4
870static int
871nfs4_file_open(struct inode *inode, struct file *filp)
872{
873 /*
874 * NFSv4 opens are handled in d_lookup and d_revalidate. If we get to
875 * this point, then something is very wrong
876 */
877 dprintk("NFS: %s called! inode=%p filp=%p\n", __func__, inode, filp);
878 return -ENOTDIR;
879}
880
881const struct file_operations nfs4_file_operations = {
882 .llseek = nfs_file_llseek,
883 .read = do_sync_read,
884 .write = do_sync_write,
885 .aio_read = nfs_file_read,
886 .aio_write = nfs_file_write,
887 .mmap = nfs_file_mmap,
888 .open = nfs4_file_open,
889 .flush = nfs_file_flush,
890 .release = nfs_file_release,
891 .fsync = nfs_file_fsync,
892 .lock = nfs_lock,
893 .flock = nfs_flock,
894 .splice_read = nfs_file_splice_read,
895 .splice_write = nfs_file_splice_write,
896 .check_flags = nfs_check_flags,
897 .setlease = nfs_setlease,
898};
899#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index c07a55aec838..50a15fa8cf98 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -291,7 +291,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
291 */ 291 */
292 inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->file_inode_ops; 292 inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->file_inode_ops;
293 if (S_ISREG(inode->i_mode)) { 293 if (S_ISREG(inode->i_mode)) {
294 inode->i_fop = &nfs_file_operations; 294 inode->i_fop = NFS_SB(sb)->nfs_client->rpc_ops->file_ops;
295 inode->i_data.a_ops = &nfs_file_aops; 295 inode->i_data.a_ops = &nfs_file_aops;
296 inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info; 296 inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info;
297 } else if (S_ISDIR(inode->i_mode)) { 297 } else if (S_ISDIR(inode->i_mode)) {
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index c1a1bd8ddf1c..3f4d95751d52 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -299,6 +299,8 @@ extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
299extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, 299extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
300 struct list_head *head); 300 struct list_head *head);
301 301
302extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
303 struct inode *inode);
302extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); 304extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
303extern void nfs_readdata_release(struct nfs_read_data *rdata); 305extern void nfs_readdata_release(struct nfs_read_data *rdata);
304 306
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 85f1690ca08c..d4bc9ed91748 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -853,6 +853,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
853 .dentry_ops = &nfs_dentry_operations, 853 .dentry_ops = &nfs_dentry_operations,
854 .dir_inode_ops = &nfs3_dir_inode_operations, 854 .dir_inode_ops = &nfs3_dir_inode_operations,
855 .file_inode_ops = &nfs3_file_inode_operations, 855 .file_inode_ops = &nfs3_file_inode_operations,
856 .file_ops = &nfs_file_operations,
856 .getroot = nfs3_proc_get_root, 857 .getroot = nfs3_proc_get_root,
857 .getattr = nfs3_proc_getattr, 858 .getattr = nfs3_proc_getattr,
858 .setattr = nfs3_proc_setattr, 859 .setattr = nfs3_proc_setattr,
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index b60fddf606f7..be2bbac13817 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2464,8 +2464,7 @@ static int nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qst
2464 case -NFS4ERR_BADNAME: 2464 case -NFS4ERR_BADNAME:
2465 return -ENOENT; 2465 return -ENOENT;
2466 case -NFS4ERR_MOVED: 2466 case -NFS4ERR_MOVED:
2467 err = nfs4_get_referral(dir, name, fattr, fhandle); 2467 return nfs4_get_referral(dir, name, fattr, fhandle);
2468 break;
2469 case -NFS4ERR_WRONGSEC: 2468 case -NFS4ERR_WRONGSEC:
2470 nfs_fixup_secinfo_attributes(fattr, fhandle); 2469 nfs_fixup_secinfo_attributes(fattr, fhandle);
2471 } 2470 }
@@ -6253,6 +6252,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
6253 .dentry_ops = &nfs4_dentry_operations, 6252 .dentry_ops = &nfs4_dentry_operations,
6254 .dir_inode_ops = &nfs4_dir_inode_operations, 6253 .dir_inode_ops = &nfs4_dir_inode_operations,
6255 .file_inode_ops = &nfs4_file_inode_operations, 6254 .file_inode_ops = &nfs4_file_inode_operations,
6255 .file_ops = &nfs4_file_operations,
6256 .getroot = nfs4_proc_get_root, 6256 .getroot = nfs4_proc_get_root,
6257 .getattr = nfs4_proc_getattr, 6257 .getattr = nfs4_proc_getattr,
6258 .setattr = nfs4_proc_setattr, 6258 .setattr = nfs4_proc_setattr,
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index baf73536bc04..8e672a2b2d69 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1260,6 +1260,25 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
1260} 1260}
1261EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); 1261EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
1262 1262
1263static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
1264{
1265 struct nfs_pageio_descriptor pgio;
1266
1267 put_lseg(data->lseg);
1268 data->lseg = NULL;
1269 dprintk("pnfs write error = %d\n", data->pnfs_error);
1270
1271 nfs_pageio_init_read_mds(&pgio, data->inode);
1272
1273 while (!list_empty(&data->pages)) {
1274 struct nfs_page *req = nfs_list_entry(data->pages.next);
1275
1276 nfs_list_remove_request(req);
1277 nfs_pageio_add_request(&pgio, req);
1278 }
1279 nfs_pageio_complete(&pgio);
1280}
1281
1263/* 1282/*
1264 * Called by non rpc-based layout drivers 1283 * Called by non rpc-based layout drivers
1265 */ 1284 */
@@ -1268,11 +1287,8 @@ void pnfs_ld_read_done(struct nfs_read_data *data)
1268 if (likely(!data->pnfs_error)) { 1287 if (likely(!data->pnfs_error)) {
1269 __nfs4_read_done_cb(data); 1288 __nfs4_read_done_cb(data);
1270 data->mds_ops->rpc_call_done(&data->task, data); 1289 data->mds_ops->rpc_call_done(&data->task, data);
1271 } else { 1290 } else
1272 put_lseg(data->lseg); 1291 pnfs_ld_handle_read_error(data);
1273 data->lseg = NULL;
1274 dprintk("pnfs write error = %d\n", data->pnfs_error);
1275 }
1276 data->mds_ops->rpc_release(data); 1292 data->mds_ops->rpc_release(data);
1277} 1293}
1278EXPORT_SYMBOL_GPL(pnfs_ld_read_done); 1294EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index ac40b8535d7e..f48125da198a 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -710,6 +710,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
710 .dentry_ops = &nfs_dentry_operations, 710 .dentry_ops = &nfs_dentry_operations,
711 .dir_inode_ops = &nfs_dir_inode_operations, 711 .dir_inode_ops = &nfs_dir_inode_operations,
712 .file_inode_ops = &nfs_file_inode_operations, 712 .file_inode_ops = &nfs_file_inode_operations,
713 .file_ops = &nfs_file_operations,
713 .getroot = nfs_proc_get_root, 714 .getroot = nfs_proc_get_root,
714 .getattr = nfs_proc_getattr, 715 .getattr = nfs_proc_getattr,
715 .setattr = nfs_proc_setattr, 716 .setattr = nfs_proc_setattr,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 8b48ec63f722..cfa175c223dc 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -109,7 +109,7 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
109 } 109 }
110} 110}
111 111
112static void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, 112void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
113 struct inode *inode) 113 struct inode *inode)
114{ 114{
115 nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, 115 nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops,
@@ -534,23 +534,13 @@ static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
534static void nfs_readpage_release_full(void *calldata) 534static void nfs_readpage_release_full(void *calldata)
535{ 535{
536 struct nfs_read_data *data = calldata; 536 struct nfs_read_data *data = calldata;
537 struct nfs_pageio_descriptor pgio;
538 537
539 if (data->pnfs_error) {
540 nfs_pageio_init_read_mds(&pgio, data->inode);
541 pgio.pg_recoalesce = 1;
542 }
543 while (!list_empty(&data->pages)) { 538 while (!list_empty(&data->pages)) {
544 struct nfs_page *req = nfs_list_entry(data->pages.next); 539 struct nfs_page *req = nfs_list_entry(data->pages.next);
545 540
546 nfs_list_remove_request(req); 541 nfs_list_remove_request(req);
547 if (!data->pnfs_error) 542 nfs_readpage_release(req);
548 nfs_readpage_release(req);
549 else
550 nfs_pageio_add_request(&pgio, req);
551 } 543 }
552 if (data->pnfs_error)
553 nfs_pageio_complete(&pgio);
554 nfs_readdata_release(calldata); 544 nfs_readdata_release(calldata);
555} 545}
556 546