diff options
author | Dave Airlie <airlied@redhat.com> | 2011-12-20 09:43:53 -0500 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2011-12-20 09:43:53 -0500 |
commit | 1fbe6f625f69e48c4001051dc1431afc704acfaa (patch) | |
tree | 826b741201a2e09a627ed350c6ff36935f5cff79 /fs/btrfs | |
parent | 0cecdd818cd79d092e36e70dfe3a71f2878d6b96 (diff) | |
parent | 384703b8e6cd4c8ef08512e596024e028c91c339 (diff) |
Merge tag 'v3.2-rc6' of /home/airlied/devel/kernel/linux-2.6 into drm-core-next
Merge in the upstream tree to bring in the mainline fixes.
Conflicts:
drivers/gpu/drm/exynos/exynos_drm_fbdev.c
drivers/gpu/drm/nouveau/nouveau_sgdma.c
Diffstat (limited to 'fs/btrfs')
-rw-r--r-- | fs/btrfs/async-thread.c | 117 | ||||
-rw-r--r-- | fs/btrfs/async-thread.h | 4 | ||||
-rw-r--r-- | fs/btrfs/backref.c | 2 | ||||
-rw-r--r-- | fs/btrfs/btrfs_inode.h | 4 | ||||
-rw-r--r-- | fs/btrfs/ctree.c | 17 | ||||
-rw-r--r-- | fs/btrfs/ctree.h | 11 | ||||
-rw-r--r-- | fs/btrfs/delayed-inode.c | 62 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 223 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 336 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 60 | ||||
-rw-r--r-- | fs/btrfs/extent_io.h | 2 | ||||
-rw-r--r-- | fs/btrfs/file.c | 8 | ||||
-rw-r--r-- | fs/btrfs/free-space-cache.c | 82 | ||||
-rw-r--r-- | fs/btrfs/inode-map.c | 28 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 272 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 23 | ||||
-rw-r--r-- | fs/btrfs/relocation.c | 4 | ||||
-rw-r--r-- | fs/btrfs/scrub.c | 79 | ||||
-rw-r--r-- | fs/btrfs/super.c | 125 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 12 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 15 | ||||
-rw-r--r-- | fs/btrfs/volumes.h | 6 |
22 files changed, 987 insertions, 505 deletions
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 7ec14097fef1..cb97174e2366 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c | |||
@@ -64,6 +64,8 @@ struct btrfs_worker_thread { | |||
64 | int idle; | 64 | int idle; |
65 | }; | 65 | }; |
66 | 66 | ||
67 | static int __btrfs_start_workers(struct btrfs_workers *workers); | ||
68 | |||
67 | /* | 69 | /* |
68 | * btrfs_start_workers uses kthread_run, which can block waiting for memory | 70 | * btrfs_start_workers uses kthread_run, which can block waiting for memory |
69 | * for a very long time. It will actually throttle on page writeback, | 71 | * for a very long time. It will actually throttle on page writeback, |
@@ -88,27 +90,10 @@ static void start_new_worker_func(struct btrfs_work *work) | |||
88 | { | 90 | { |
89 | struct worker_start *start; | 91 | struct worker_start *start; |
90 | start = container_of(work, struct worker_start, work); | 92 | start = container_of(work, struct worker_start, work); |
91 | btrfs_start_workers(start->queue, 1); | 93 | __btrfs_start_workers(start->queue); |
92 | kfree(start); | 94 | kfree(start); |
93 | } | 95 | } |
94 | 96 | ||
95 | static int start_new_worker(struct btrfs_workers *queue) | ||
96 | { | ||
97 | struct worker_start *start; | ||
98 | int ret; | ||
99 | |||
100 | start = kzalloc(sizeof(*start), GFP_NOFS); | ||
101 | if (!start) | ||
102 | return -ENOMEM; | ||
103 | |||
104 | start->work.func = start_new_worker_func; | ||
105 | start->queue = queue; | ||
106 | ret = btrfs_queue_worker(queue->atomic_worker_start, &start->work); | ||
107 | if (ret) | ||
108 | kfree(start); | ||
109 | return ret; | ||
110 | } | ||
111 | |||
112 | /* | 97 | /* |
113 | * helper function to move a thread onto the idle list after it | 98 | * helper function to move a thread onto the idle list after it |
114 | * has finished some requests. | 99 | * has finished some requests. |
@@ -153,12 +138,20 @@ static void check_busy_worker(struct btrfs_worker_thread *worker) | |||
153 | static void check_pending_worker_creates(struct btrfs_worker_thread *worker) | 138 | static void check_pending_worker_creates(struct btrfs_worker_thread *worker) |
154 | { | 139 | { |
155 | struct btrfs_workers *workers = worker->workers; | 140 | struct btrfs_workers *workers = worker->workers; |
141 | struct worker_start *start; | ||
156 | unsigned long flags; | 142 | unsigned long flags; |
157 | 143 | ||
158 | rmb(); | 144 | rmb(); |
159 | if (!workers->atomic_start_pending) | 145 | if (!workers->atomic_start_pending) |
160 | return; | 146 | return; |
161 | 147 | ||
148 | start = kzalloc(sizeof(*start), GFP_NOFS); | ||
149 | if (!start) | ||
150 | return; | ||
151 | |||
152 | start->work.func = start_new_worker_func; | ||
153 | start->queue = workers; | ||
154 | |||
162 | spin_lock_irqsave(&workers->lock, flags); | 155 | spin_lock_irqsave(&workers->lock, flags); |
163 | if (!workers->atomic_start_pending) | 156 | if (!workers->atomic_start_pending) |
164 | goto out; | 157 | goto out; |
@@ -170,10 +163,11 @@ static void check_pending_worker_creates(struct btrfs_worker_thread *worker) | |||
170 | 163 | ||
171 | workers->num_workers_starting += 1; | 164 | workers->num_workers_starting += 1; |
172 | spin_unlock_irqrestore(&workers->lock, flags); | 165 | spin_unlock_irqrestore(&workers->lock, flags); |
173 | start_new_worker(workers); | 166 | btrfs_queue_worker(workers->atomic_worker_start, &start->work); |
174 | return; | 167 | return; |
175 | 168 | ||
176 | out: | 169 | out: |
170 | kfree(start); | ||
177 | spin_unlock_irqrestore(&workers->lock, flags); | 171 | spin_unlock_irqrestore(&workers->lock, flags); |
178 | } | 172 | } |
179 | 173 | ||
@@ -331,7 +325,7 @@ again: | |||
331 | run_ordered_completions(worker->workers, work); | 325 | run_ordered_completions(worker->workers, work); |
332 | 326 | ||
333 | check_pending_worker_creates(worker); | 327 | check_pending_worker_creates(worker); |
334 | 328 | cond_resched(); | |
335 | } | 329 | } |
336 | 330 | ||
337 | spin_lock_irq(&worker->lock); | 331 | spin_lock_irq(&worker->lock); |
@@ -462,56 +456,55 @@ void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max, | |||
462 | * starts new worker threads. This does not enforce the max worker | 456 | * starts new worker threads. This does not enforce the max worker |
463 | * count in case you need to temporarily go past it. | 457 | * count in case you need to temporarily go past it. |
464 | */ | 458 | */ |
465 | static int __btrfs_start_workers(struct btrfs_workers *workers, | 459 | static int __btrfs_start_workers(struct btrfs_workers *workers) |
466 | int num_workers) | ||
467 | { | 460 | { |
468 | struct btrfs_worker_thread *worker; | 461 | struct btrfs_worker_thread *worker; |
469 | int ret = 0; | 462 | int ret = 0; |
470 | int i; | ||
471 | 463 | ||
472 | for (i = 0; i < num_workers; i++) { | 464 | worker = kzalloc(sizeof(*worker), GFP_NOFS); |
473 | worker = kzalloc(sizeof(*worker), GFP_NOFS); | 465 | if (!worker) { |
474 | if (!worker) { | 466 | ret = -ENOMEM; |
475 | ret = -ENOMEM; | 467 | goto fail; |
476 | goto fail; | 468 | } |
477 | } | ||
478 | 469 | ||
479 | INIT_LIST_HEAD(&worker->pending); | 470 | INIT_LIST_HEAD(&worker->pending); |
480 | INIT_LIST_HEAD(&worker->prio_pending); | 471 | INIT_LIST_HEAD(&worker->prio_pending); |
481 | INIT_LIST_HEAD(&worker->worker_list); | 472 | INIT_LIST_HEAD(&worker->worker_list); |
482 | spin_lock_init(&worker->lock); | 473 | spin_lock_init(&worker->lock); |
483 | 474 | ||
484 | atomic_set(&worker->num_pending, 0); | 475 | atomic_set(&worker->num_pending, 0); |
485 | atomic_set(&worker->refs, 1); | 476 | atomic_set(&worker->refs, 1); |
486 | worker->workers = workers; | 477 | worker->workers = workers; |
487 | worker->task = kthread_run(worker_loop, worker, | 478 | worker->task = kthread_run(worker_loop, worker, |
488 | "btrfs-%s-%d", workers->name, | 479 | "btrfs-%s-%d", workers->name, |
489 | workers->num_workers + i); | 480 | workers->num_workers + 1); |
490 | if (IS_ERR(worker->task)) { | 481 | if (IS_ERR(worker->task)) { |
491 | ret = PTR_ERR(worker->task); | 482 | ret = PTR_ERR(worker->task); |
492 | kfree(worker); | 483 | kfree(worker); |
493 | goto fail; | 484 | goto fail; |
494 | } | ||
495 | spin_lock_irq(&workers->lock); | ||
496 | list_add_tail(&worker->worker_list, &workers->idle_list); | ||
497 | worker->idle = 1; | ||
498 | workers->num_workers++; | ||
499 | workers->num_workers_starting--; | ||
500 | WARN_ON(workers->num_workers_starting < 0); | ||
501 | spin_unlock_irq(&workers->lock); | ||
502 | } | 485 | } |
486 | spin_lock_irq(&workers->lock); | ||
487 | list_add_tail(&worker->worker_list, &workers->idle_list); | ||
488 | worker->idle = 1; | ||
489 | workers->num_workers++; | ||
490 | workers->num_workers_starting--; | ||
491 | WARN_ON(workers->num_workers_starting < 0); | ||
492 | spin_unlock_irq(&workers->lock); | ||
493 | |||
503 | return 0; | 494 | return 0; |
504 | fail: | 495 | fail: |
505 | btrfs_stop_workers(workers); | 496 | spin_lock_irq(&workers->lock); |
497 | workers->num_workers_starting--; | ||
498 | spin_unlock_irq(&workers->lock); | ||
506 | return ret; | 499 | return ret; |
507 | } | 500 | } |
508 | 501 | ||
509 | int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) | 502 | int btrfs_start_workers(struct btrfs_workers *workers) |
510 | { | 503 | { |
511 | spin_lock_irq(&workers->lock); | 504 | spin_lock_irq(&workers->lock); |
512 | workers->num_workers_starting += num_workers; | 505 | workers->num_workers_starting++; |
513 | spin_unlock_irq(&workers->lock); | 506 | spin_unlock_irq(&workers->lock); |
514 | return __btrfs_start_workers(workers, num_workers); | 507 | return __btrfs_start_workers(workers); |
515 | } | 508 | } |
516 | 509 | ||
517 | /* | 510 | /* |
@@ -568,6 +561,7 @@ static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers) | |||
568 | struct btrfs_worker_thread *worker; | 561 | struct btrfs_worker_thread *worker; |
569 | unsigned long flags; | 562 | unsigned long flags; |
570 | struct list_head *fallback; | 563 | struct list_head *fallback; |
564 | int ret; | ||
571 | 565 | ||
572 | again: | 566 | again: |
573 | spin_lock_irqsave(&workers->lock, flags); | 567 | spin_lock_irqsave(&workers->lock, flags); |
@@ -584,7 +578,9 @@ again: | |||
584 | workers->num_workers_starting++; | 578 | workers->num_workers_starting++; |
585 | spin_unlock_irqrestore(&workers->lock, flags); | 579 | spin_unlock_irqrestore(&workers->lock, flags); |
586 | /* we're below the limit, start another worker */ | 580 | /* we're below the limit, start another worker */ |
587 | __btrfs_start_workers(workers, 1); | 581 | ret = __btrfs_start_workers(workers); |
582 | if (ret) | ||
583 | goto fallback; | ||
588 | goto again; | 584 | goto again; |
589 | } | 585 | } |
590 | } | 586 | } |
@@ -665,7 +661,7 @@ void btrfs_set_work_high_prio(struct btrfs_work *work) | |||
665 | /* | 661 | /* |
666 | * places a struct btrfs_work into the pending queue of one of the kthreads | 662 | * places a struct btrfs_work into the pending queue of one of the kthreads |
667 | */ | 663 | */ |
668 | int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) | 664 | void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) |
669 | { | 665 | { |
670 | struct btrfs_worker_thread *worker; | 666 | struct btrfs_worker_thread *worker; |
671 | unsigned long flags; | 667 | unsigned long flags; |
@@ -673,7 +669,7 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) | |||
673 | 669 | ||
674 | /* don't requeue something already on a list */ | 670 | /* don't requeue something already on a list */ |
675 | if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags)) | 671 | if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags)) |
676 | goto out; | 672 | return; |
677 | 673 | ||
678 | worker = find_worker(workers); | 674 | worker = find_worker(workers); |
679 | if (workers->ordered) { | 675 | if (workers->ordered) { |
@@ -712,7 +708,4 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) | |||
712 | if (wake) | 708 | if (wake) |
713 | wake_up_process(worker->task); | 709 | wake_up_process(worker->task); |
714 | spin_unlock_irqrestore(&worker->lock, flags); | 710 | spin_unlock_irqrestore(&worker->lock, flags); |
715 | |||
716 | out: | ||
717 | return 0; | ||
718 | } | 711 | } |
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index 5077746cf85e..f34cc31fa3c9 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h | |||
@@ -109,8 +109,8 @@ struct btrfs_workers { | |||
109 | char *name; | 109 | char *name; |
110 | }; | 110 | }; |
111 | 111 | ||
112 | int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work); | 112 | void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work); |
113 | int btrfs_start_workers(struct btrfs_workers *workers, int num_workers); | 113 | int btrfs_start_workers(struct btrfs_workers *workers); |
114 | int btrfs_stop_workers(struct btrfs_workers *workers); | 114 | int btrfs_stop_workers(struct btrfs_workers *workers); |
115 | void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max, | 115 | void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max, |
116 | struct btrfs_workers *async_starter); | 116 | struct btrfs_workers *async_starter); |
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 8855aad3929c..22c64fff1bd5 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c | |||
@@ -683,7 +683,7 @@ static int inode_to_path(u64 inum, struct btrfs_inode_ref *iref, | |||
683 | return PTR_ERR(fspath); | 683 | return PTR_ERR(fspath); |
684 | 684 | ||
685 | if (fspath > fspath_min) { | 685 | if (fspath > fspath_min) { |
686 | ipath->fspath->val[i] = (u64)fspath; | 686 | ipath->fspath->val[i] = (u64)(unsigned long)fspath; |
687 | ++ipath->fspath->elem_cnt; | 687 | ++ipath->fspath->elem_cnt; |
688 | ipath->fspath->bytes_left = fspath - fspath_min; | 688 | ipath->fspath->bytes_left = fspath - fspath_min; |
689 | } else { | 689 | } else { |
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 5a5d325a3935..634608d2a6d0 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -147,14 +147,12 @@ struct btrfs_inode { | |||
147 | * the btrfs file release call will add this inode to the | 147 | * the btrfs file release call will add this inode to the |
148 | * ordered operations list so that we make sure to flush out any | 148 | * ordered operations list so that we make sure to flush out any |
149 | * new data the application may have written before commit. | 149 | * new data the application may have written before commit. |
150 | * | ||
151 | * yes, its silly to have a single bitflag, but we might grow more | ||
152 | * of these. | ||
153 | */ | 150 | */ |
154 | unsigned ordered_data_close:1; | 151 | unsigned ordered_data_close:1; |
155 | unsigned orphan_meta_reserved:1; | 152 | unsigned orphan_meta_reserved:1; |
156 | unsigned dummy_inode:1; | 153 | unsigned dummy_inode:1; |
157 | unsigned in_defrag:1; | 154 | unsigned in_defrag:1; |
155 | unsigned delalloc_meta_reserved:1; | ||
158 | 156 | ||
159 | /* | 157 | /* |
160 | * always compress this one file | 158 | * always compress this one file |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 0fe615e4ea38..dede441bdeee 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -514,10 +514,25 @@ static inline int should_cow_block(struct btrfs_trans_handle *trans, | |||
514 | struct btrfs_root *root, | 514 | struct btrfs_root *root, |
515 | struct extent_buffer *buf) | 515 | struct extent_buffer *buf) |
516 | { | 516 | { |
517 | /* ensure we can see the force_cow */ | ||
518 | smp_rmb(); | ||
519 | |||
520 | /* | ||
521 | * We do not need to cow a block if | ||
522 | * 1) this block is not created or changed in this transaction; | ||
523 | * 2) this block does not belong to TREE_RELOC tree; | ||
524 | * 3) the root is not forced COW. | ||
525 | * | ||
526 | * What is forced COW: | ||
527 | * when we create snapshot during commiting the transaction, | ||
528 | * after we've finished coping src root, we must COW the shared | ||
529 | * block to ensure the metadata consistency. | ||
530 | */ | ||
517 | if (btrfs_header_generation(buf) == trans->transid && | 531 | if (btrfs_header_generation(buf) == trans->transid && |
518 | !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) && | 532 | !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) && |
519 | !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID && | 533 | !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID && |
520 | btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) | 534 | btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) && |
535 | !root->force_cow) | ||
521 | return 0; | 536 | return 0; |
522 | return 1; | 537 | return 1; |
523 | } | 538 | } |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b9ba59ff9292..67385033323d 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -848,7 +848,8 @@ struct btrfs_free_cluster { | |||
848 | enum btrfs_caching_type { | 848 | enum btrfs_caching_type { |
849 | BTRFS_CACHE_NO = 0, | 849 | BTRFS_CACHE_NO = 0, |
850 | BTRFS_CACHE_STARTED = 1, | 850 | BTRFS_CACHE_STARTED = 1, |
851 | BTRFS_CACHE_FINISHED = 2, | 851 | BTRFS_CACHE_FAST = 2, |
852 | BTRFS_CACHE_FINISHED = 3, | ||
852 | }; | 853 | }; |
853 | 854 | ||
854 | enum btrfs_disk_cache_state { | 855 | enum btrfs_disk_cache_state { |
@@ -1271,6 +1272,8 @@ struct btrfs_root { | |||
1271 | * for stat. It may be used for more later | 1272 | * for stat. It may be used for more later |
1272 | */ | 1273 | */ |
1273 | dev_t anon_dev; | 1274 | dev_t anon_dev; |
1275 | |||
1276 | int force_cow; | ||
1274 | }; | 1277 | }; |
1275 | 1278 | ||
1276 | struct btrfs_ioctl_defrag_range_args { | 1279 | struct btrfs_ioctl_defrag_range_args { |
@@ -2366,6 +2369,9 @@ int btrfs_block_rsv_check(struct btrfs_root *root, | |||
2366 | int btrfs_block_rsv_refill(struct btrfs_root *root, | 2369 | int btrfs_block_rsv_refill(struct btrfs_root *root, |
2367 | struct btrfs_block_rsv *block_rsv, | 2370 | struct btrfs_block_rsv *block_rsv, |
2368 | u64 min_reserved); | 2371 | u64 min_reserved); |
2372 | int btrfs_block_rsv_refill_noflush(struct btrfs_root *root, | ||
2373 | struct btrfs_block_rsv *block_rsv, | ||
2374 | u64 min_reserved); | ||
2369 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, | 2375 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, |
2370 | struct btrfs_block_rsv *dst_rsv, | 2376 | struct btrfs_block_rsv *dst_rsv, |
2371 | u64 num_bytes); | 2377 | u64 num_bytes); |
@@ -2686,7 +2692,8 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); | |||
2686 | int btrfs_readpage(struct file *file, struct page *page); | 2692 | int btrfs_readpage(struct file *file, struct page *page); |
2687 | void btrfs_evict_inode(struct inode *inode); | 2693 | void btrfs_evict_inode(struct inode *inode); |
2688 | int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc); | 2694 | int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc); |
2689 | void btrfs_dirty_inode(struct inode *inode, int flags); | 2695 | int btrfs_dirty_inode(struct inode *inode); |
2696 | int btrfs_update_time(struct file *file); | ||
2690 | struct inode *btrfs_alloc_inode(struct super_block *sb); | 2697 | struct inode *btrfs_alloc_inode(struct super_block *sb); |
2691 | void btrfs_destroy_inode(struct inode *inode); | 2698 | void btrfs_destroy_inode(struct inode *inode); |
2692 | int btrfs_drop_inode(struct inode *inode); | 2699 | int btrfs_drop_inode(struct inode *inode); |
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 3a1b939c9ae2..9c1eccc2c503 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c | |||
@@ -617,12 +617,14 @@ static void btrfs_delayed_item_release_metadata(struct btrfs_root *root, | |||
617 | static int btrfs_delayed_inode_reserve_metadata( | 617 | static int btrfs_delayed_inode_reserve_metadata( |
618 | struct btrfs_trans_handle *trans, | 618 | struct btrfs_trans_handle *trans, |
619 | struct btrfs_root *root, | 619 | struct btrfs_root *root, |
620 | struct inode *inode, | ||
620 | struct btrfs_delayed_node *node) | 621 | struct btrfs_delayed_node *node) |
621 | { | 622 | { |
622 | struct btrfs_block_rsv *src_rsv; | 623 | struct btrfs_block_rsv *src_rsv; |
623 | struct btrfs_block_rsv *dst_rsv; | 624 | struct btrfs_block_rsv *dst_rsv; |
624 | u64 num_bytes; | 625 | u64 num_bytes; |
625 | int ret; | 626 | int ret; |
627 | int release = false; | ||
626 | 628 | ||
627 | src_rsv = trans->block_rsv; | 629 | src_rsv = trans->block_rsv; |
628 | dst_rsv = &root->fs_info->delayed_block_rsv; | 630 | dst_rsv = &root->fs_info->delayed_block_rsv; |
@@ -638,8 +640,8 @@ static int btrfs_delayed_inode_reserve_metadata( | |||
638 | * Now if src_rsv == delalloc_block_rsv we'll let it just steal since | 640 | * Now if src_rsv == delalloc_block_rsv we'll let it just steal since |
639 | * we're accounted for. | 641 | * we're accounted for. |
640 | */ | 642 | */ |
641 | if (!trans->bytes_reserved && | 643 | if (!src_rsv || (!trans->bytes_reserved && |
642 | src_rsv != &root->fs_info->delalloc_block_rsv) { | 644 | src_rsv != &root->fs_info->delalloc_block_rsv)) { |
643 | ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes); | 645 | ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes); |
644 | /* | 646 | /* |
645 | * Since we're under a transaction reserve_metadata_bytes could | 647 | * Since we're under a transaction reserve_metadata_bytes could |
@@ -652,12 +654,65 @@ static int btrfs_delayed_inode_reserve_metadata( | |||
652 | if (!ret) | 654 | if (!ret) |
653 | node->bytes_reserved = num_bytes; | 655 | node->bytes_reserved = num_bytes; |
654 | return ret; | 656 | return ret; |
657 | } else if (src_rsv == &root->fs_info->delalloc_block_rsv) { | ||
658 | spin_lock(&BTRFS_I(inode)->lock); | ||
659 | if (BTRFS_I(inode)->delalloc_meta_reserved) { | ||
660 | BTRFS_I(inode)->delalloc_meta_reserved = 0; | ||
661 | spin_unlock(&BTRFS_I(inode)->lock); | ||
662 | release = true; | ||
663 | goto migrate; | ||
664 | } | ||
665 | spin_unlock(&BTRFS_I(inode)->lock); | ||
666 | |||
667 | /* Ok we didn't have space pre-reserved. This shouldn't happen | ||
668 | * too often but it can happen if we do delalloc to an existing | ||
669 | * inode which gets dirtied because of the time update, and then | ||
670 | * isn't touched again until after the transaction commits and | ||
671 | * then we try to write out the data. First try to be nice and | ||
672 | * reserve something strictly for us. If not be a pain and try | ||
673 | * to steal from the delalloc block rsv. | ||
674 | */ | ||
675 | ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes); | ||
676 | if (!ret) | ||
677 | goto out; | ||
678 | |||
679 | ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); | ||
680 | if (!ret) | ||
681 | goto out; | ||
682 | |||
683 | /* | ||
684 | * Ok this is a problem, let's just steal from the global rsv | ||
685 | * since this really shouldn't happen that often. | ||
686 | */ | ||
687 | WARN_ON(1); | ||
688 | ret = btrfs_block_rsv_migrate(&root->fs_info->global_block_rsv, | ||
689 | dst_rsv, num_bytes); | ||
690 | goto out; | ||
655 | } | 691 | } |
656 | 692 | ||
693 | migrate: | ||
657 | ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); | 694 | ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); |
695 | |||
696 | out: | ||
697 | /* | ||
698 | * Migrate only takes a reservation, it doesn't touch the size of the | ||
699 | * block_rsv. This is to simplify people who don't normally have things | ||
700 | * migrated from their block rsv. If they go to release their | ||
701 | * reservation, that will decrease the size as well, so if migrate | ||
702 | * reduced size we'd end up with a negative size. But for the | ||
703 | * delalloc_meta_reserved stuff we will only know to drop 1 reservation, | ||
704 | * but we could in fact do this reserve/migrate dance several times | ||
705 | * between the time we did the original reservation and we'd clean it | ||
706 | * up. So to take care of this, release the space for the meta | ||
707 | * reservation here. I think it may be time for a documentation page on | ||
708 | * how block rsvs. work. | ||
709 | */ | ||
658 | if (!ret) | 710 | if (!ret) |
659 | node->bytes_reserved = num_bytes; | 711 | node->bytes_reserved = num_bytes; |
660 | 712 | ||
713 | if (release) | ||
714 | btrfs_block_rsv_release(root, src_rsv, num_bytes); | ||
715 | |||
661 | return ret; | 716 | return ret; |
662 | } | 717 | } |
663 | 718 | ||
@@ -1708,7 +1763,8 @@ int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans, | |||
1708 | goto release_node; | 1763 | goto release_node; |
1709 | } | 1764 | } |
1710 | 1765 | ||
1711 | ret = btrfs_delayed_inode_reserve_metadata(trans, root, delayed_node); | 1766 | ret = btrfs_delayed_inode_reserve_metadata(trans, root, inode, |
1767 | delayed_node); | ||
1712 | if (ret) | 1768 | if (ret) |
1713 | goto release_node; | 1769 | goto release_node; |
1714 | 1770 | ||
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 102c176fc29c..f44b3928dc2d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -620,7 +620,7 @@ out: | |||
620 | 620 | ||
621 | static int btree_io_failed_hook(struct bio *failed_bio, | 621 | static int btree_io_failed_hook(struct bio *failed_bio, |
622 | struct page *page, u64 start, u64 end, | 622 | struct page *page, u64 start, u64 end, |
623 | u64 mirror_num, struct extent_state *state) | 623 | int mirror_num, struct extent_state *state) |
624 | { | 624 | { |
625 | struct extent_io_tree *tree; | 625 | struct extent_io_tree *tree; |
626 | unsigned long len; | 626 | unsigned long len; |
@@ -1890,31 +1890,32 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1890 | u64 features; | 1890 | u64 features; |
1891 | struct btrfs_key location; | 1891 | struct btrfs_key location; |
1892 | struct buffer_head *bh; | 1892 | struct buffer_head *bh; |
1893 | struct btrfs_root *extent_root = kzalloc(sizeof(struct btrfs_root), | 1893 | struct btrfs_super_block *disk_super; |
1894 | GFP_NOFS); | ||
1895 | struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root), | ||
1896 | GFP_NOFS); | ||
1897 | struct btrfs_root *tree_root = btrfs_sb(sb); | 1894 | struct btrfs_root *tree_root = btrfs_sb(sb); |
1898 | struct btrfs_fs_info *fs_info = NULL; | 1895 | struct btrfs_fs_info *fs_info = tree_root->fs_info; |
1899 | struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root), | 1896 | struct btrfs_root *extent_root; |
1900 | GFP_NOFS); | 1897 | struct btrfs_root *csum_root; |
1901 | struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root), | 1898 | struct btrfs_root *chunk_root; |
1902 | GFP_NOFS); | 1899 | struct btrfs_root *dev_root; |
1903 | struct btrfs_root *log_tree_root; | 1900 | struct btrfs_root *log_tree_root; |
1904 | |||
1905 | int ret; | 1901 | int ret; |
1906 | int err = -EINVAL; | 1902 | int err = -EINVAL; |
1907 | int num_backups_tried = 0; | 1903 | int num_backups_tried = 0; |
1908 | int backup_index = 0; | 1904 | int backup_index = 0; |
1909 | 1905 | ||
1910 | struct btrfs_super_block *disk_super; | 1906 | extent_root = fs_info->extent_root = |
1907 | kzalloc(sizeof(struct btrfs_root), GFP_NOFS); | ||
1908 | csum_root = fs_info->csum_root = | ||
1909 | kzalloc(sizeof(struct btrfs_root), GFP_NOFS); | ||
1910 | chunk_root = fs_info->chunk_root = | ||
1911 | kzalloc(sizeof(struct btrfs_root), GFP_NOFS); | ||
1912 | dev_root = fs_info->dev_root = | ||
1913 | kzalloc(sizeof(struct btrfs_root), GFP_NOFS); | ||
1911 | 1914 | ||
1912 | if (!extent_root || !tree_root || !tree_root->fs_info || | 1915 | if (!extent_root || !csum_root || !chunk_root || !dev_root) { |
1913 | !chunk_root || !dev_root || !csum_root) { | ||
1914 | err = -ENOMEM; | 1916 | err = -ENOMEM; |
1915 | goto fail; | 1917 | goto fail; |
1916 | } | 1918 | } |
1917 | fs_info = tree_root->fs_info; | ||
1918 | 1919 | ||
1919 | ret = init_srcu_struct(&fs_info->subvol_srcu); | 1920 | ret = init_srcu_struct(&fs_info->subvol_srcu); |
1920 | if (ret) { | 1921 | if (ret) { |
@@ -1954,12 +1955,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1954 | mutex_init(&fs_info->reloc_mutex); | 1955 | mutex_init(&fs_info->reloc_mutex); |
1955 | 1956 | ||
1956 | init_completion(&fs_info->kobj_unregister); | 1957 | init_completion(&fs_info->kobj_unregister); |
1957 | fs_info->tree_root = tree_root; | ||
1958 | fs_info->extent_root = extent_root; | ||
1959 | fs_info->csum_root = csum_root; | ||
1960 | fs_info->chunk_root = chunk_root; | ||
1961 | fs_info->dev_root = dev_root; | ||
1962 | fs_info->fs_devices = fs_devices; | ||
1963 | INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); | 1958 | INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); |
1964 | INIT_LIST_HEAD(&fs_info->space_info); | 1959 | INIT_LIST_HEAD(&fs_info->space_info); |
1965 | btrfs_mapping_init(&fs_info->mapping_tree); | 1960 | btrfs_mapping_init(&fs_info->mapping_tree); |
@@ -2199,19 +2194,27 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
2199 | fs_info->endio_meta_write_workers.idle_thresh = 2; | 2194 | fs_info->endio_meta_write_workers.idle_thresh = 2; |
2200 | fs_info->readahead_workers.idle_thresh = 2; | 2195 | fs_info->readahead_workers.idle_thresh = 2; |
2201 | 2196 | ||
2202 | btrfs_start_workers(&fs_info->workers, 1); | 2197 | /* |
2203 | btrfs_start_workers(&fs_info->generic_worker, 1); | 2198 | * btrfs_start_workers can really only fail because of ENOMEM so just |
2204 | btrfs_start_workers(&fs_info->submit_workers, 1); | 2199 | * return -ENOMEM if any of these fail. |
2205 | btrfs_start_workers(&fs_info->delalloc_workers, 1); | 2200 | */ |
2206 | btrfs_start_workers(&fs_info->fixup_workers, 1); | 2201 | ret = btrfs_start_workers(&fs_info->workers); |
2207 | btrfs_start_workers(&fs_info->endio_workers, 1); | 2202 | ret |= btrfs_start_workers(&fs_info->generic_worker); |
2208 | btrfs_start_workers(&fs_info->endio_meta_workers, 1); | 2203 | ret |= btrfs_start_workers(&fs_info->submit_workers); |
2209 | btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); | 2204 | ret |= btrfs_start_workers(&fs_info->delalloc_workers); |
2210 | btrfs_start_workers(&fs_info->endio_write_workers, 1); | 2205 | ret |= btrfs_start_workers(&fs_info->fixup_workers); |
2211 | btrfs_start_workers(&fs_info->endio_freespace_worker, 1); | 2206 | ret |= btrfs_start_workers(&fs_info->endio_workers); |
2212 | btrfs_start_workers(&fs_info->delayed_workers, 1); | 2207 | ret |= btrfs_start_workers(&fs_info->endio_meta_workers); |
2213 | btrfs_start_workers(&fs_info->caching_workers, 1); | 2208 | ret |= btrfs_start_workers(&fs_info->endio_meta_write_workers); |
2214 | btrfs_start_workers(&fs_info->readahead_workers, 1); | 2209 | ret |= btrfs_start_workers(&fs_info->endio_write_workers); |
2210 | ret |= btrfs_start_workers(&fs_info->endio_freespace_worker); | ||
2211 | ret |= btrfs_start_workers(&fs_info->delayed_workers); | ||
2212 | ret |= btrfs_start_workers(&fs_info->caching_workers); | ||
2213 | ret |= btrfs_start_workers(&fs_info->readahead_workers); | ||
2214 | if (ret) { | ||
2215 | ret = -ENOMEM; | ||
2216 | goto fail_sb_buffer; | ||
2217 | } | ||
2215 | 2218 | ||
2216 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); | 2219 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); |
2217 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, | 2220 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, |
@@ -2465,21 +2468,20 @@ fail_sb_buffer: | |||
2465 | btrfs_stop_workers(&fs_info->caching_workers); | 2468 | btrfs_stop_workers(&fs_info->caching_workers); |
2466 | fail_alloc: | 2469 | fail_alloc: |
2467 | fail_iput: | 2470 | fail_iput: |
2471 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | ||
2472 | |||
2468 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); | 2473 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); |
2469 | iput(fs_info->btree_inode); | 2474 | iput(fs_info->btree_inode); |
2470 | |||
2471 | btrfs_close_devices(fs_info->fs_devices); | ||
2472 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | ||
2473 | fail_bdi: | 2475 | fail_bdi: |
2474 | bdi_destroy(&fs_info->bdi); | 2476 | bdi_destroy(&fs_info->bdi); |
2475 | fail_srcu: | 2477 | fail_srcu: |
2476 | cleanup_srcu_struct(&fs_info->subvol_srcu); | 2478 | cleanup_srcu_struct(&fs_info->subvol_srcu); |
2477 | fail: | 2479 | fail: |
2480 | btrfs_close_devices(fs_info->fs_devices); | ||
2478 | free_fs_info(fs_info); | 2481 | free_fs_info(fs_info); |
2479 | return ERR_PTR(err); | 2482 | return ERR_PTR(err); |
2480 | 2483 | ||
2481 | recovery_tree_root: | 2484 | recovery_tree_root: |
2482 | |||
2483 | if (!btrfs_test_opt(tree_root, RECOVERY)) | 2485 | if (!btrfs_test_opt(tree_root, RECOVERY)) |
2484 | goto fail_tree_roots; | 2486 | goto fail_tree_roots; |
2485 | 2487 | ||
@@ -2579,22 +2581,10 @@ static int write_dev_supers(struct btrfs_device *device, | |||
2579 | int errors = 0; | 2581 | int errors = 0; |
2580 | u32 crc; | 2582 | u32 crc; |
2581 | u64 bytenr; | 2583 | u64 bytenr; |
2582 | int last_barrier = 0; | ||
2583 | 2584 | ||
2584 | if (max_mirrors == 0) | 2585 | if (max_mirrors == 0) |
2585 | max_mirrors = BTRFS_SUPER_MIRROR_MAX; | 2586 | max_mirrors = BTRFS_SUPER_MIRROR_MAX; |
2586 | 2587 | ||
2587 | /* make sure only the last submit_bh does a barrier */ | ||
2588 | if (do_barriers) { | ||
2589 | for (i = 0; i < max_mirrors; i++) { | ||
2590 | bytenr = btrfs_sb_offset(i); | ||
2591 | if (bytenr + BTRFS_SUPER_INFO_SIZE >= | ||
2592 | device->total_bytes) | ||
2593 | break; | ||
2594 | last_barrier = i; | ||
2595 | } | ||
2596 | } | ||
2597 | |||
2598 | for (i = 0; i < max_mirrors; i++) { | 2588 | for (i = 0; i < max_mirrors; i++) { |
2599 | bytenr = btrfs_sb_offset(i); | 2589 | bytenr = btrfs_sb_offset(i); |
2600 | if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes) | 2590 | if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes) |
@@ -2640,17 +2630,136 @@ static int write_dev_supers(struct btrfs_device *device, | |||
2640 | bh->b_end_io = btrfs_end_buffer_write_sync; | 2630 | bh->b_end_io = btrfs_end_buffer_write_sync; |
2641 | } | 2631 | } |
2642 | 2632 | ||
2643 | if (i == last_barrier && do_barriers) | 2633 | /* |
2644 | ret = submit_bh(WRITE_FLUSH_FUA, bh); | 2634 | * we fua the first super. The others we allow |
2645 | else | 2635 | * to go down lazy. |
2646 | ret = submit_bh(WRITE_SYNC, bh); | 2636 | */ |
2647 | 2637 | ret = submit_bh(WRITE_FUA, bh); | |
2648 | if (ret) | 2638 | if (ret) |
2649 | errors++; | 2639 | errors++; |
2650 | } | 2640 | } |
2651 | return errors < i ? 0 : -1; | 2641 | return errors < i ? 0 : -1; |
2652 | } | 2642 | } |
2653 | 2643 | ||
2644 | /* | ||
2645 | * endio for the write_dev_flush, this will wake anyone waiting | ||
2646 | * for the barrier when it is done | ||
2647 | */ | ||
2648 | static void btrfs_end_empty_barrier(struct bio *bio, int err) | ||
2649 | { | ||
2650 | if (err) { | ||
2651 | if (err == -EOPNOTSUPP) | ||
2652 | set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); | ||
2653 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | ||
2654 | } | ||
2655 | if (bio->bi_private) | ||
2656 | complete(bio->bi_private); | ||
2657 | bio_put(bio); | ||
2658 | } | ||
2659 | |||
2660 | /* | ||
2661 | * trigger flushes for one the devices. If you pass wait == 0, the flushes are | ||
2662 | * sent down. With wait == 1, it waits for the previous flush. | ||
2663 | * | ||
2664 | * any device where the flush fails with eopnotsupp are flagged as not-barrier | ||
2665 | * capable | ||
2666 | */ | ||
2667 | static int write_dev_flush(struct btrfs_device *device, int wait) | ||
2668 | { | ||
2669 | struct bio *bio; | ||
2670 | int ret = 0; | ||
2671 | |||
2672 | if (device->nobarriers) | ||
2673 | return 0; | ||
2674 | |||
2675 | if (wait) { | ||
2676 | bio = device->flush_bio; | ||
2677 | if (!bio) | ||
2678 | return 0; | ||
2679 | |||
2680 | wait_for_completion(&device->flush_wait); | ||
2681 | |||
2682 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) { | ||
2683 | printk("btrfs: disabling barriers on dev %s\n", | ||
2684 | device->name); | ||
2685 | device->nobarriers = 1; | ||
2686 | } | ||
2687 | if (!bio_flagged(bio, BIO_UPTODATE)) { | ||
2688 | ret = -EIO; | ||
2689 | } | ||
2690 | |||
2691 | /* drop the reference from the wait == 0 run */ | ||
2692 | bio_put(bio); | ||
2693 | device->flush_bio = NULL; | ||
2694 | |||
2695 | return ret; | ||
2696 | } | ||
2697 | |||
2698 | /* | ||
2699 | * one reference for us, and we leave it for the | ||
2700 | * caller | ||
2701 | */ | ||
2702 | device->flush_bio = NULL;; | ||
2703 | bio = bio_alloc(GFP_NOFS, 0); | ||
2704 | if (!bio) | ||
2705 | return -ENOMEM; | ||
2706 | |||
2707 | bio->bi_end_io = btrfs_end_empty_barrier; | ||
2708 | bio->bi_bdev = device->bdev; | ||
2709 | init_completion(&device->flush_wait); | ||
2710 | bio->bi_private = &device->flush_wait; | ||
2711 | device->flush_bio = bio; | ||
2712 | |||
2713 | bio_get(bio); | ||
2714 | submit_bio(WRITE_FLUSH, bio); | ||
2715 | |||
2716 | return 0; | ||
2717 | } | ||
2718 | |||
2719 | /* | ||
2720 | * send an empty flush down to each device in parallel, | ||
2721 | * then wait for them | ||
2722 | */ | ||
2723 | static int barrier_all_devices(struct btrfs_fs_info *info) | ||
2724 | { | ||
2725 | struct list_head *head; | ||
2726 | struct btrfs_device *dev; | ||
2727 | int errors = 0; | ||
2728 | int ret; | ||
2729 | |||
2730 | /* send down all the barriers */ | ||
2731 | head = &info->fs_devices->devices; | ||
2732 | list_for_each_entry_rcu(dev, head, dev_list) { | ||
2733 | if (!dev->bdev) { | ||
2734 | errors++; | ||
2735 | continue; | ||
2736 | } | ||
2737 | if (!dev->in_fs_metadata || !dev->writeable) | ||
2738 | continue; | ||
2739 | |||
2740 | ret = write_dev_flush(dev, 0); | ||
2741 | if (ret) | ||
2742 | errors++; | ||
2743 | } | ||
2744 | |||
2745 | /* wait for all the barriers */ | ||
2746 | list_for_each_entry_rcu(dev, head, dev_list) { | ||
2747 | if (!dev->bdev) { | ||
2748 | errors++; | ||
2749 | continue; | ||
2750 | } | ||
2751 | if (!dev->in_fs_metadata || !dev->writeable) | ||
2752 | continue; | ||
2753 | |||
2754 | ret = write_dev_flush(dev, 1); | ||
2755 | if (ret) | ||
2756 | errors++; | ||
2757 | } | ||
2758 | if (errors) | ||
2759 | return -EIO; | ||
2760 | return 0; | ||
2761 | } | ||
2762 | |||
2654 | int write_all_supers(struct btrfs_root *root, int max_mirrors) | 2763 | int write_all_supers(struct btrfs_root *root, int max_mirrors) |
2655 | { | 2764 | { |
2656 | struct list_head *head; | 2765 | struct list_head *head; |
@@ -2672,6 +2781,10 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors) | |||
2672 | 2781 | ||
2673 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | 2782 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); |
2674 | head = &root->fs_info->fs_devices->devices; | 2783 | head = &root->fs_info->fs_devices->devices; |
2784 | |||
2785 | if (do_barriers) | ||
2786 | barrier_all_devices(root->fs_info); | ||
2787 | |||
2675 | list_for_each_entry_rcu(dev, head, dev_list) { | 2788 | list_for_each_entry_rcu(dev, head, dev_list) { |
2676 | if (!dev->bdev) { | 2789 | if (!dev->bdev) { |
2677 | total_errors++; | 2790 | total_errors++; |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 9879bd474632..f5fbe576d2ba 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -467,13 +467,59 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
467 | struct btrfs_root *root, | 467 | struct btrfs_root *root, |
468 | int load_cache_only) | 468 | int load_cache_only) |
469 | { | 469 | { |
470 | DEFINE_WAIT(wait); | ||
470 | struct btrfs_fs_info *fs_info = cache->fs_info; | 471 | struct btrfs_fs_info *fs_info = cache->fs_info; |
471 | struct btrfs_caching_control *caching_ctl; | 472 | struct btrfs_caching_control *caching_ctl; |
472 | int ret = 0; | 473 | int ret = 0; |
473 | 474 | ||
474 | smp_mb(); | 475 | caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS); |
475 | if (cache->cached != BTRFS_CACHE_NO) | 476 | BUG_ON(!caching_ctl); |
477 | |||
478 | INIT_LIST_HEAD(&caching_ctl->list); | ||
479 | mutex_init(&caching_ctl->mutex); | ||
480 | init_waitqueue_head(&caching_ctl->wait); | ||
481 | caching_ctl->block_group = cache; | ||
482 | caching_ctl->progress = cache->key.objectid; | ||
483 | atomic_set(&caching_ctl->count, 1); | ||
484 | caching_ctl->work.func = caching_thread; | ||
485 | |||
486 | spin_lock(&cache->lock); | ||
487 | /* | ||
488 | * This should be a rare occasion, but this could happen I think in the | ||
489 | * case where one thread starts to load the space cache info, and then | ||
490 | * some other thread starts a transaction commit which tries to do an | ||
491 | * allocation while the other thread is still loading the space cache | ||
492 | * info. The previous loop should have kept us from choosing this block | ||
493 | * group, but if we've moved to the state where we will wait on caching | ||
494 | * block groups we need to first check if we're doing a fast load here, | ||
495 | * so we can wait for it to finish, otherwise we could end up allocating | ||
496 | * from a block group who's cache gets evicted for one reason or | ||
497 | * another. | ||
498 | */ | ||
499 | while (cache->cached == BTRFS_CACHE_FAST) { | ||
500 | struct btrfs_caching_control *ctl; | ||
501 | |||
502 | ctl = cache->caching_ctl; | ||
503 | atomic_inc(&ctl->count); | ||
504 | prepare_to_wait(&ctl->wait, &wait, TASK_UNINTERRUPTIBLE); | ||
505 | spin_unlock(&cache->lock); | ||
506 | |||
507 | schedule(); | ||
508 | |||
509 | finish_wait(&ctl->wait, &wait); | ||
510 | put_caching_control(ctl); | ||
511 | spin_lock(&cache->lock); | ||
512 | } | ||
513 | |||
514 | if (cache->cached != BTRFS_CACHE_NO) { | ||
515 | spin_unlock(&cache->lock); | ||
516 | kfree(caching_ctl); | ||
476 | return 0; | 517 | return 0; |
518 | } | ||
519 | WARN_ON(cache->caching_ctl); | ||
520 | cache->caching_ctl = caching_ctl; | ||
521 | cache->cached = BTRFS_CACHE_FAST; | ||
522 | spin_unlock(&cache->lock); | ||
477 | 523 | ||
478 | /* | 524 | /* |
479 | * We can't do the read from on-disk cache during a commit since we need | 525 | * We can't do the read from on-disk cache during a commit since we need |
@@ -484,56 +530,51 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
484 | if (trans && (!trans->transaction->in_commit) && | 530 | if (trans && (!trans->transaction->in_commit) && |
485 | (root && root != root->fs_info->tree_root) && | 531 | (root && root != root->fs_info->tree_root) && |
486 | btrfs_test_opt(root, SPACE_CACHE)) { | 532 | btrfs_test_opt(root, SPACE_CACHE)) { |
487 | spin_lock(&cache->lock); | ||
488 | if (cache->cached != BTRFS_CACHE_NO) { | ||
489 | spin_unlock(&cache->lock); | ||
490 | return 0; | ||
491 | } | ||
492 | cache->cached = BTRFS_CACHE_STARTED; | ||
493 | spin_unlock(&cache->lock); | ||
494 | |||
495 | ret = load_free_space_cache(fs_info, cache); | 533 | ret = load_free_space_cache(fs_info, cache); |
496 | 534 | ||
497 | spin_lock(&cache->lock); | 535 | spin_lock(&cache->lock); |
498 | if (ret == 1) { | 536 | if (ret == 1) { |
537 | cache->caching_ctl = NULL; | ||
499 | cache->cached = BTRFS_CACHE_FINISHED; | 538 | cache->cached = BTRFS_CACHE_FINISHED; |
500 | cache->last_byte_to_unpin = (u64)-1; | 539 | cache->last_byte_to_unpin = (u64)-1; |
501 | } else { | 540 | } else { |
502 | cache->cached = BTRFS_CACHE_NO; | 541 | if (load_cache_only) { |
542 | cache->caching_ctl = NULL; | ||
543 | cache->cached = BTRFS_CACHE_NO; | ||
544 | } else { | ||
545 | cache->cached = BTRFS_CACHE_STARTED; | ||
546 | } | ||
503 | } | 547 | } |
504 | spin_unlock(&cache->lock); | 548 | spin_unlock(&cache->lock); |
549 | wake_up(&caching_ctl->wait); | ||
505 | if (ret == 1) { | 550 | if (ret == 1) { |
551 | put_caching_control(caching_ctl); | ||
506 | free_excluded_extents(fs_info->extent_root, cache); | 552 | free_excluded_extents(fs_info->extent_root, cache); |
507 | return 0; | 553 | return 0; |
508 | } | 554 | } |
555 | } else { | ||
556 | /* | ||
557 | * We are not going to do the fast caching, set cached to the | ||
558 | * appropriate value and wakeup any waiters. | ||
559 | */ | ||
560 | spin_lock(&cache->lock); | ||
561 | if (load_cache_only) { | ||
562 | cache->caching_ctl = NULL; | ||
563 | cache->cached = BTRFS_CACHE_NO; | ||
564 | } else { | ||
565 | cache->cached = BTRFS_CACHE_STARTED; | ||
566 | } | ||
567 | spin_unlock(&cache->lock); | ||
568 | wake_up(&caching_ctl->wait); | ||
509 | } | 569 | } |
510 | 570 | ||
511 | if (load_cache_only) | 571 | if (load_cache_only) { |
512 | return 0; | 572 | put_caching_control(caching_ctl); |
513 | |||
514 | caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS); | ||
515 | BUG_ON(!caching_ctl); | ||
516 | |||
517 | INIT_LIST_HEAD(&caching_ctl->list); | ||
518 | mutex_init(&caching_ctl->mutex); | ||
519 | init_waitqueue_head(&caching_ctl->wait); | ||
520 | caching_ctl->block_group = cache; | ||
521 | caching_ctl->progress = cache->key.objectid; | ||
522 | /* one for caching kthread, one for caching block group list */ | ||
523 | atomic_set(&caching_ctl->count, 2); | ||
524 | caching_ctl->work.func = caching_thread; | ||
525 | |||
526 | spin_lock(&cache->lock); | ||
527 | if (cache->cached != BTRFS_CACHE_NO) { | ||
528 | spin_unlock(&cache->lock); | ||
529 | kfree(caching_ctl); | ||
530 | return 0; | 573 | return 0; |
531 | } | 574 | } |
532 | cache->caching_ctl = caching_ctl; | ||
533 | cache->cached = BTRFS_CACHE_STARTED; | ||
534 | spin_unlock(&cache->lock); | ||
535 | 575 | ||
536 | down_write(&fs_info->extent_commit_sem); | 576 | down_write(&fs_info->extent_commit_sem); |
577 | atomic_inc(&caching_ctl->count); | ||
537 | list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); | 578 | list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); |
538 | up_write(&fs_info->extent_commit_sem); | 579 | up_write(&fs_info->extent_commit_sem); |
539 | 580 | ||
@@ -2781,7 +2822,7 @@ out_free: | |||
2781 | btrfs_release_path(path); | 2822 | btrfs_release_path(path); |
2782 | out: | 2823 | out: |
2783 | spin_lock(&block_group->lock); | 2824 | spin_lock(&block_group->lock); |
2784 | if (!ret) | 2825 | if (!ret && dcs == BTRFS_DC_SETUP) |
2785 | block_group->cache_generation = trans->transid; | 2826 | block_group->cache_generation = trans->transid; |
2786 | block_group->disk_cache_state = dcs; | 2827 | block_group->disk_cache_state = dcs; |
2787 | spin_unlock(&block_group->lock); | 2828 | spin_unlock(&block_group->lock); |
@@ -3797,16 +3838,16 @@ void btrfs_free_block_rsv(struct btrfs_root *root, | |||
3797 | kfree(rsv); | 3838 | kfree(rsv); |
3798 | } | 3839 | } |
3799 | 3840 | ||
3800 | int btrfs_block_rsv_add(struct btrfs_root *root, | 3841 | static inline int __block_rsv_add(struct btrfs_root *root, |
3801 | struct btrfs_block_rsv *block_rsv, | 3842 | struct btrfs_block_rsv *block_rsv, |
3802 | u64 num_bytes) | 3843 | u64 num_bytes, int flush) |
3803 | { | 3844 | { |
3804 | int ret; | 3845 | int ret; |
3805 | 3846 | ||
3806 | if (num_bytes == 0) | 3847 | if (num_bytes == 0) |
3807 | return 0; | 3848 | return 0; |
3808 | 3849 | ||
3809 | ret = reserve_metadata_bytes(root, block_rsv, num_bytes, 1); | 3850 | ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush); |
3810 | if (!ret) { | 3851 | if (!ret) { |
3811 | block_rsv_add_bytes(block_rsv, num_bytes, 1); | 3852 | block_rsv_add_bytes(block_rsv, num_bytes, 1); |
3812 | return 0; | 3853 | return 0; |
@@ -3815,22 +3856,18 @@ int btrfs_block_rsv_add(struct btrfs_root *root, | |||
3815 | return ret; | 3856 | return ret; |
3816 | } | 3857 | } |
3817 | 3858 | ||
3859 | int btrfs_block_rsv_add(struct btrfs_root *root, | ||
3860 | struct btrfs_block_rsv *block_rsv, | ||
3861 | u64 num_bytes) | ||
3862 | { | ||
3863 | return __block_rsv_add(root, block_rsv, num_bytes, 1); | ||
3864 | } | ||
3865 | |||
3818 | int btrfs_block_rsv_add_noflush(struct btrfs_root *root, | 3866 | int btrfs_block_rsv_add_noflush(struct btrfs_root *root, |
3819 | struct btrfs_block_rsv *block_rsv, | 3867 | struct btrfs_block_rsv *block_rsv, |
3820 | u64 num_bytes) | 3868 | u64 num_bytes) |
3821 | { | 3869 | { |
3822 | int ret; | 3870 | return __block_rsv_add(root, block_rsv, num_bytes, 0); |
3823 | |||
3824 | if (num_bytes == 0) | ||
3825 | return 0; | ||
3826 | |||
3827 | ret = reserve_metadata_bytes(root, block_rsv, num_bytes, 0); | ||
3828 | if (!ret) { | ||
3829 | block_rsv_add_bytes(block_rsv, num_bytes, 1); | ||
3830 | return 0; | ||
3831 | } | ||
3832 | |||
3833 | return ret; | ||
3834 | } | 3871 | } |
3835 | 3872 | ||
3836 | int btrfs_block_rsv_check(struct btrfs_root *root, | 3873 | int btrfs_block_rsv_check(struct btrfs_root *root, |
@@ -3851,9 +3888,9 @@ int btrfs_block_rsv_check(struct btrfs_root *root, | |||
3851 | return ret; | 3888 | return ret; |
3852 | } | 3889 | } |
3853 | 3890 | ||
3854 | int btrfs_block_rsv_refill(struct btrfs_root *root, | 3891 | static inline int __btrfs_block_rsv_refill(struct btrfs_root *root, |
3855 | struct btrfs_block_rsv *block_rsv, | 3892 | struct btrfs_block_rsv *block_rsv, |
3856 | u64 min_reserved) | 3893 | u64 min_reserved, int flush) |
3857 | { | 3894 | { |
3858 | u64 num_bytes = 0; | 3895 | u64 num_bytes = 0; |
3859 | int ret = -ENOSPC; | 3896 | int ret = -ENOSPC; |
@@ -3872,7 +3909,7 @@ int btrfs_block_rsv_refill(struct btrfs_root *root, | |||
3872 | if (!ret) | 3909 | if (!ret) |
3873 | return 0; | 3910 | return 0; |
3874 | 3911 | ||
3875 | ret = reserve_metadata_bytes(root, block_rsv, num_bytes, 1); | 3912 | ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush); |
3876 | if (!ret) { | 3913 | if (!ret) { |
3877 | block_rsv_add_bytes(block_rsv, num_bytes, 0); | 3914 | block_rsv_add_bytes(block_rsv, num_bytes, 0); |
3878 | return 0; | 3915 | return 0; |
@@ -3881,6 +3918,20 @@ int btrfs_block_rsv_refill(struct btrfs_root *root, | |||
3881 | return ret; | 3918 | return ret; |
3882 | } | 3919 | } |
3883 | 3920 | ||
3921 | int btrfs_block_rsv_refill(struct btrfs_root *root, | ||
3922 | struct btrfs_block_rsv *block_rsv, | ||
3923 | u64 min_reserved) | ||
3924 | { | ||
3925 | return __btrfs_block_rsv_refill(root, block_rsv, min_reserved, 1); | ||
3926 | } | ||
3927 | |||
3928 | int btrfs_block_rsv_refill_noflush(struct btrfs_root *root, | ||
3929 | struct btrfs_block_rsv *block_rsv, | ||
3930 | u64 min_reserved) | ||
3931 | { | ||
3932 | return __btrfs_block_rsv_refill(root, block_rsv, min_reserved, 0); | ||
3933 | } | ||
3934 | |||
3884 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, | 3935 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, |
3885 | struct btrfs_block_rsv *dst_rsv, | 3936 | struct btrfs_block_rsv *dst_rsv, |
3886 | u64 num_bytes) | 3937 | u64 num_bytes) |
@@ -4064,23 +4115,30 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans, | |||
4064 | */ | 4115 | */ |
4065 | static unsigned drop_outstanding_extent(struct inode *inode) | 4116 | static unsigned drop_outstanding_extent(struct inode *inode) |
4066 | { | 4117 | { |
4118 | unsigned drop_inode_space = 0; | ||
4067 | unsigned dropped_extents = 0; | 4119 | unsigned dropped_extents = 0; |
4068 | 4120 | ||
4069 | BUG_ON(!BTRFS_I(inode)->outstanding_extents); | 4121 | BUG_ON(!BTRFS_I(inode)->outstanding_extents); |
4070 | BTRFS_I(inode)->outstanding_extents--; | 4122 | BTRFS_I(inode)->outstanding_extents--; |
4071 | 4123 | ||
4124 | if (BTRFS_I(inode)->outstanding_extents == 0 && | ||
4125 | BTRFS_I(inode)->delalloc_meta_reserved) { | ||
4126 | drop_inode_space = 1; | ||
4127 | BTRFS_I(inode)->delalloc_meta_reserved = 0; | ||
4128 | } | ||
4129 | |||
4072 | /* | 4130 | /* |
4073 | * If we have more or the same amount of outsanding extents than we have | 4131 | * If we have more or the same amount of outsanding extents than we have |
4074 | * reserved then we need to leave the reserved extents count alone. | 4132 | * reserved then we need to leave the reserved extents count alone. |
4075 | */ | 4133 | */ |
4076 | if (BTRFS_I(inode)->outstanding_extents >= | 4134 | if (BTRFS_I(inode)->outstanding_extents >= |
4077 | BTRFS_I(inode)->reserved_extents) | 4135 | BTRFS_I(inode)->reserved_extents) |
4078 | return 0; | 4136 | return drop_inode_space; |
4079 | 4137 | ||
4080 | dropped_extents = BTRFS_I(inode)->reserved_extents - | 4138 | dropped_extents = BTRFS_I(inode)->reserved_extents - |
4081 | BTRFS_I(inode)->outstanding_extents; | 4139 | BTRFS_I(inode)->outstanding_extents; |
4082 | BTRFS_I(inode)->reserved_extents -= dropped_extents; | 4140 | BTRFS_I(inode)->reserved_extents -= dropped_extents; |
4083 | return dropped_extents; | 4141 | return dropped_extents + drop_inode_space; |
4084 | } | 4142 | } |
4085 | 4143 | ||
4086 | /** | 4144 | /** |
@@ -4146,12 +4204,17 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
4146 | struct btrfs_root *root = BTRFS_I(inode)->root; | 4204 | struct btrfs_root *root = BTRFS_I(inode)->root; |
4147 | struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; | 4205 | struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; |
4148 | u64 to_reserve = 0; | 4206 | u64 to_reserve = 0; |
4207 | u64 csum_bytes; | ||
4149 | unsigned nr_extents = 0; | 4208 | unsigned nr_extents = 0; |
4209 | int extra_reserve = 0; | ||
4150 | int flush = 1; | 4210 | int flush = 1; |
4151 | int ret; | 4211 | int ret; |
4152 | 4212 | ||
4213 | /* Need to be holding the i_mutex here if we aren't free space cache */ | ||
4153 | if (btrfs_is_free_space_inode(root, inode)) | 4214 | if (btrfs_is_free_space_inode(root, inode)) |
4154 | flush = 0; | 4215 | flush = 0; |
4216 | else | ||
4217 | WARN_ON(!mutex_is_locked(&inode->i_mutex)); | ||
4155 | 4218 | ||
4156 | if (flush && btrfs_transaction_in_commit(root->fs_info)) | 4219 | if (flush && btrfs_transaction_in_commit(root->fs_info)) |
4157 | schedule_timeout(1); | 4220 | schedule_timeout(1); |
@@ -4162,14 +4225,22 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
4162 | BTRFS_I(inode)->outstanding_extents++; | 4225 | BTRFS_I(inode)->outstanding_extents++; |
4163 | 4226 | ||
4164 | if (BTRFS_I(inode)->outstanding_extents > | 4227 | if (BTRFS_I(inode)->outstanding_extents > |
4165 | BTRFS_I(inode)->reserved_extents) { | 4228 | BTRFS_I(inode)->reserved_extents) |
4166 | nr_extents = BTRFS_I(inode)->outstanding_extents - | 4229 | nr_extents = BTRFS_I(inode)->outstanding_extents - |
4167 | BTRFS_I(inode)->reserved_extents; | 4230 | BTRFS_I(inode)->reserved_extents; |
4168 | BTRFS_I(inode)->reserved_extents += nr_extents; | ||
4169 | 4231 | ||
4170 | to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); | 4232 | /* |
4233 | * Add an item to reserve for updating the inode when we complete the | ||
4234 | * delalloc io. | ||
4235 | */ | ||
4236 | if (!BTRFS_I(inode)->delalloc_meta_reserved) { | ||
4237 | nr_extents++; | ||
4238 | extra_reserve = 1; | ||
4171 | } | 4239 | } |
4240 | |||
4241 | to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); | ||
4172 | to_reserve += calc_csum_metadata_size(inode, num_bytes, 1); | 4242 | to_reserve += calc_csum_metadata_size(inode, num_bytes, 1); |
4243 | csum_bytes = BTRFS_I(inode)->csum_bytes; | ||
4173 | spin_unlock(&BTRFS_I(inode)->lock); | 4244 | spin_unlock(&BTRFS_I(inode)->lock); |
4174 | 4245 | ||
4175 | ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); | 4246 | ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); |
@@ -4179,22 +4250,35 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
4179 | 4250 | ||
4180 | spin_lock(&BTRFS_I(inode)->lock); | 4251 | spin_lock(&BTRFS_I(inode)->lock); |
4181 | dropped = drop_outstanding_extent(inode); | 4252 | dropped = drop_outstanding_extent(inode); |
4182 | to_free = calc_csum_metadata_size(inode, num_bytes, 0); | ||
4183 | spin_unlock(&BTRFS_I(inode)->lock); | ||
4184 | to_free += btrfs_calc_trans_metadata_size(root, dropped); | ||
4185 | |||
4186 | /* | 4253 | /* |
4187 | * Somebody could have come in and twiddled with the | 4254 | * If the inodes csum_bytes is the same as the original |
4188 | * reservation, so if we have to free more than we would have | 4255 | * csum_bytes then we know we haven't raced with any free()ers |
4189 | * reserved from this reservation go ahead and release those | 4256 | * so we can just reduce our inodes csum bytes and carry on. |
4190 | * bytes. | 4257 | * Otherwise we have to do the normal free thing to account for |
4258 | * the case that the free side didn't free up its reserve | ||
4259 | * because of this outstanding reservation. | ||
4191 | */ | 4260 | */ |
4192 | to_free -= to_reserve; | 4261 | if (BTRFS_I(inode)->csum_bytes == csum_bytes) |
4262 | calc_csum_metadata_size(inode, num_bytes, 0); | ||
4263 | else | ||
4264 | to_free = calc_csum_metadata_size(inode, num_bytes, 0); | ||
4265 | spin_unlock(&BTRFS_I(inode)->lock); | ||
4266 | if (dropped) | ||
4267 | to_free += btrfs_calc_trans_metadata_size(root, dropped); | ||
4268 | |||
4193 | if (to_free) | 4269 | if (to_free) |
4194 | btrfs_block_rsv_release(root, block_rsv, to_free); | 4270 | btrfs_block_rsv_release(root, block_rsv, to_free); |
4195 | return ret; | 4271 | return ret; |
4196 | } | 4272 | } |
4197 | 4273 | ||
4274 | spin_lock(&BTRFS_I(inode)->lock); | ||
4275 | if (extra_reserve) { | ||
4276 | BTRFS_I(inode)->delalloc_meta_reserved = 1; | ||
4277 | nr_extents--; | ||
4278 | } | ||
4279 | BTRFS_I(inode)->reserved_extents += nr_extents; | ||
4280 | spin_unlock(&BTRFS_I(inode)->lock); | ||
4281 | |||
4198 | block_rsv_add_bytes(block_rsv, to_reserve, 1); | 4282 | block_rsv_add_bytes(block_rsv, to_reserve, 1); |
4199 | 4283 | ||
4200 | return 0; | 4284 | return 0; |
@@ -5040,11 +5124,11 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
5040 | struct btrfs_root *root = orig_root->fs_info->extent_root; | 5124 | struct btrfs_root *root = orig_root->fs_info->extent_root; |
5041 | struct btrfs_free_cluster *last_ptr = NULL; | 5125 | struct btrfs_free_cluster *last_ptr = NULL; |
5042 | struct btrfs_block_group_cache *block_group = NULL; | 5126 | struct btrfs_block_group_cache *block_group = NULL; |
5127 | struct btrfs_block_group_cache *used_block_group; | ||
5043 | int empty_cluster = 2 * 1024 * 1024; | 5128 | int empty_cluster = 2 * 1024 * 1024; |
5044 | int allowed_chunk_alloc = 0; | 5129 | int allowed_chunk_alloc = 0; |
5045 | int done_chunk_alloc = 0; | 5130 | int done_chunk_alloc = 0; |
5046 | struct btrfs_space_info *space_info; | 5131 | struct btrfs_space_info *space_info; |
5047 | int last_ptr_loop = 0; | ||
5048 | int loop = 0; | 5132 | int loop = 0; |
5049 | int index = 0; | 5133 | int index = 0; |
5050 | int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ? | 5134 | int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ? |
@@ -5106,6 +5190,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
5106 | ideal_cache: | 5190 | ideal_cache: |
5107 | block_group = btrfs_lookup_block_group(root->fs_info, | 5191 | block_group = btrfs_lookup_block_group(root->fs_info, |
5108 | search_start); | 5192 | search_start); |
5193 | used_block_group = block_group; | ||
5109 | /* | 5194 | /* |
5110 | * we don't want to use the block group if it doesn't match our | 5195 | * we don't want to use the block group if it doesn't match our |
5111 | * allocation bits, or if its not cached. | 5196 | * allocation bits, or if its not cached. |
@@ -5143,6 +5228,7 @@ search: | |||
5143 | u64 offset; | 5228 | u64 offset; |
5144 | int cached; | 5229 | int cached; |
5145 | 5230 | ||
5231 | used_block_group = block_group; | ||
5146 | btrfs_get_block_group(block_group); | 5232 | btrfs_get_block_group(block_group); |
5147 | search_start = block_group->key.objectid; | 5233 | search_start = block_group->key.objectid; |
5148 | 5234 | ||
@@ -5166,13 +5252,15 @@ search: | |||
5166 | } | 5252 | } |
5167 | 5253 | ||
5168 | have_block_group: | 5254 | have_block_group: |
5169 | if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { | 5255 | cached = block_group_cache_done(block_group); |
5256 | if (unlikely(!cached)) { | ||
5170 | u64 free_percent; | 5257 | u64 free_percent; |
5171 | 5258 | ||
5259 | found_uncached_bg = true; | ||
5172 | ret = cache_block_group(block_group, trans, | 5260 | ret = cache_block_group(block_group, trans, |
5173 | orig_root, 1); | 5261 | orig_root, 1); |
5174 | if (block_group->cached == BTRFS_CACHE_FINISHED) | 5262 | if (block_group->cached == BTRFS_CACHE_FINISHED) |
5175 | goto have_block_group; | 5263 | goto alloc; |
5176 | 5264 | ||
5177 | free_percent = btrfs_block_group_used(&block_group->item); | 5265 | free_percent = btrfs_block_group_used(&block_group->item); |
5178 | free_percent *= 100; | 5266 | free_percent *= 100; |
@@ -5194,7 +5282,6 @@ have_block_group: | |||
5194 | orig_root, 0); | 5282 | orig_root, 0); |
5195 | BUG_ON(ret); | 5283 | BUG_ON(ret); |
5196 | } | 5284 | } |
5197 | found_uncached_bg = true; | ||
5198 | 5285 | ||
5199 | /* | 5286 | /* |
5200 | * If loop is set for cached only, try the next block | 5287 | * If loop is set for cached only, try the next block |
@@ -5204,94 +5291,80 @@ have_block_group: | |||
5204 | goto loop; | 5291 | goto loop; |
5205 | } | 5292 | } |
5206 | 5293 | ||
5207 | cached = block_group_cache_done(block_group); | 5294 | alloc: |
5208 | if (unlikely(!cached)) | ||
5209 | found_uncached_bg = true; | ||
5210 | |||
5211 | if (unlikely(block_group->ro)) | 5295 | if (unlikely(block_group->ro)) |
5212 | goto loop; | 5296 | goto loop; |
5213 | 5297 | ||
5214 | spin_lock(&block_group->free_space_ctl->tree_lock); | 5298 | spin_lock(&block_group->free_space_ctl->tree_lock); |
5215 | if (cached && | 5299 | if (cached && |
5216 | block_group->free_space_ctl->free_space < | 5300 | block_group->free_space_ctl->free_space < |
5217 | num_bytes + empty_size) { | 5301 | num_bytes + empty_cluster + empty_size) { |
5218 | spin_unlock(&block_group->free_space_ctl->tree_lock); | 5302 | spin_unlock(&block_group->free_space_ctl->tree_lock); |
5219 | goto loop; | 5303 | goto loop; |
5220 | } | 5304 | } |
5221 | spin_unlock(&block_group->free_space_ctl->tree_lock); | 5305 | spin_unlock(&block_group->free_space_ctl->tree_lock); |
5222 | 5306 | ||
5223 | /* | 5307 | /* |
5224 | * Ok we want to try and use the cluster allocator, so lets look | 5308 | * Ok we want to try and use the cluster allocator, so |
5225 | * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will | 5309 | * lets look there |
5226 | * have tried the cluster allocator plenty of times at this | ||
5227 | * point and not have found anything, so we are likely way too | ||
5228 | * fragmented for the clustering stuff to find anything, so lets | ||
5229 | * just skip it and let the allocator find whatever block it can | ||
5230 | * find | ||
5231 | */ | 5310 | */ |
5232 | if (last_ptr && loop < LOOP_NO_EMPTY_SIZE) { | 5311 | if (last_ptr) { |
5233 | /* | 5312 | /* |
5234 | * the refill lock keeps out other | 5313 | * the refill lock keeps out other |
5235 | * people trying to start a new cluster | 5314 | * people trying to start a new cluster |
5236 | */ | 5315 | */ |
5237 | spin_lock(&last_ptr->refill_lock); | 5316 | spin_lock(&last_ptr->refill_lock); |
5238 | if (last_ptr->block_group && | 5317 | used_block_group = last_ptr->block_group; |
5239 | (last_ptr->block_group->ro || | 5318 | if (used_block_group != block_group && |
5240 | !block_group_bits(last_ptr->block_group, data))) { | 5319 | (!used_block_group || |
5241 | offset = 0; | 5320 | used_block_group->ro || |
5321 | !block_group_bits(used_block_group, data))) { | ||
5322 | used_block_group = block_group; | ||
5242 | goto refill_cluster; | 5323 | goto refill_cluster; |
5243 | } | 5324 | } |
5244 | 5325 | ||
5245 | offset = btrfs_alloc_from_cluster(block_group, last_ptr, | 5326 | if (used_block_group != block_group) |
5246 | num_bytes, search_start); | 5327 | btrfs_get_block_group(used_block_group); |
5328 | |||
5329 | offset = btrfs_alloc_from_cluster(used_block_group, | ||
5330 | last_ptr, num_bytes, used_block_group->key.objectid); | ||
5247 | if (offset) { | 5331 | if (offset) { |
5248 | /* we have a block, we're done */ | 5332 | /* we have a block, we're done */ |
5249 | spin_unlock(&last_ptr->refill_lock); | 5333 | spin_unlock(&last_ptr->refill_lock); |
5250 | goto checks; | 5334 | goto checks; |
5251 | } | 5335 | } |
5252 | 5336 | ||
5253 | spin_lock(&last_ptr->lock); | 5337 | WARN_ON(last_ptr->block_group != used_block_group); |
5254 | /* | 5338 | if (used_block_group != block_group) { |
5255 | * whoops, this cluster doesn't actually point to | 5339 | btrfs_put_block_group(used_block_group); |
5256 | * this block group. Get a ref on the block | 5340 | used_block_group = block_group; |
5257 | * group is does point to and try again | ||
5258 | */ | ||
5259 | if (!last_ptr_loop && last_ptr->block_group && | ||
5260 | last_ptr->block_group != block_group && | ||
5261 | index <= | ||
5262 | get_block_group_index(last_ptr->block_group)) { | ||
5263 | |||
5264 | btrfs_put_block_group(block_group); | ||
5265 | block_group = last_ptr->block_group; | ||
5266 | btrfs_get_block_group(block_group); | ||
5267 | spin_unlock(&last_ptr->lock); | ||
5268 | spin_unlock(&last_ptr->refill_lock); | ||
5269 | |||
5270 | last_ptr_loop = 1; | ||
5271 | search_start = block_group->key.objectid; | ||
5272 | /* | ||
5273 | * we know this block group is properly | ||
5274 | * in the list because | ||
5275 | * btrfs_remove_block_group, drops the | ||
5276 | * cluster before it removes the block | ||
5277 | * group from the list | ||
5278 | */ | ||
5279 | goto have_block_group; | ||
5280 | } | 5341 | } |
5281 | spin_unlock(&last_ptr->lock); | ||
5282 | refill_cluster: | 5342 | refill_cluster: |
5343 | BUG_ON(used_block_group != block_group); | ||
5344 | /* If we are on LOOP_NO_EMPTY_SIZE, we can't | ||
5345 | * set up a new clusters, so lets just skip it | ||
5346 | * and let the allocator find whatever block | ||
5347 | * it can find. If we reach this point, we | ||
5348 | * will have tried the cluster allocator | ||
5349 | * plenty of times and not have found | ||
5350 | * anything, so we are likely way too | ||
5351 | * fragmented for the clustering stuff to find | ||
5352 | * anything. */ | ||
5353 | if (loop >= LOOP_NO_EMPTY_SIZE) { | ||
5354 | spin_unlock(&last_ptr->refill_lock); | ||
5355 | goto unclustered_alloc; | ||
5356 | } | ||
5357 | |||
5283 | /* | 5358 | /* |
5284 | * this cluster didn't work out, free it and | 5359 | * this cluster didn't work out, free it and |
5285 | * start over | 5360 | * start over |
5286 | */ | 5361 | */ |
5287 | btrfs_return_cluster_to_free_space(NULL, last_ptr); | 5362 | btrfs_return_cluster_to_free_space(NULL, last_ptr); |
5288 | 5363 | ||
5289 | last_ptr_loop = 0; | ||
5290 | |||
5291 | /* allocate a cluster in this block group */ | 5364 | /* allocate a cluster in this block group */ |
5292 | ret = btrfs_find_space_cluster(trans, root, | 5365 | ret = btrfs_find_space_cluster(trans, root, |
5293 | block_group, last_ptr, | 5366 | block_group, last_ptr, |
5294 | offset, num_bytes, | 5367 | search_start, num_bytes, |
5295 | empty_cluster + empty_size); | 5368 | empty_cluster + empty_size); |
5296 | if (ret == 0) { | 5369 | if (ret == 0) { |
5297 | /* | 5370 | /* |
@@ -5327,6 +5400,7 @@ refill_cluster: | |||
5327 | goto loop; | 5400 | goto loop; |
5328 | } | 5401 | } |
5329 | 5402 | ||
5403 | unclustered_alloc: | ||
5330 | offset = btrfs_find_space_for_alloc(block_group, search_start, | 5404 | offset = btrfs_find_space_for_alloc(block_group, search_start, |
5331 | num_bytes, empty_size); | 5405 | num_bytes, empty_size); |
5332 | /* | 5406 | /* |
@@ -5353,14 +5427,14 @@ checks: | |||
5353 | search_start = stripe_align(root, offset); | 5427 | search_start = stripe_align(root, offset); |
5354 | /* move on to the next group */ | 5428 | /* move on to the next group */ |
5355 | if (search_start + num_bytes >= search_end) { | 5429 | if (search_start + num_bytes >= search_end) { |
5356 | btrfs_add_free_space(block_group, offset, num_bytes); | 5430 | btrfs_add_free_space(used_block_group, offset, num_bytes); |
5357 | goto loop; | 5431 | goto loop; |
5358 | } | 5432 | } |
5359 | 5433 | ||
5360 | /* move on to the next group */ | 5434 | /* move on to the next group */ |
5361 | if (search_start + num_bytes > | 5435 | if (search_start + num_bytes > |
5362 | block_group->key.objectid + block_group->key.offset) { | 5436 | used_block_group->key.objectid + used_block_group->key.offset) { |
5363 | btrfs_add_free_space(block_group, offset, num_bytes); | 5437 | btrfs_add_free_space(used_block_group, offset, num_bytes); |
5364 | goto loop; | 5438 | goto loop; |
5365 | } | 5439 | } |
5366 | 5440 | ||
@@ -5368,14 +5442,14 @@ checks: | |||
5368 | ins->offset = num_bytes; | 5442 | ins->offset = num_bytes; |
5369 | 5443 | ||
5370 | if (offset < search_start) | 5444 | if (offset < search_start) |
5371 | btrfs_add_free_space(block_group, offset, | 5445 | btrfs_add_free_space(used_block_group, offset, |
5372 | search_start - offset); | 5446 | search_start - offset); |
5373 | BUG_ON(offset > search_start); | 5447 | BUG_ON(offset > search_start); |
5374 | 5448 | ||
5375 | ret = btrfs_update_reserved_bytes(block_group, num_bytes, | 5449 | ret = btrfs_update_reserved_bytes(used_block_group, num_bytes, |
5376 | alloc_type); | 5450 | alloc_type); |
5377 | if (ret == -EAGAIN) { | 5451 | if (ret == -EAGAIN) { |
5378 | btrfs_add_free_space(block_group, offset, num_bytes); | 5452 | btrfs_add_free_space(used_block_group, offset, num_bytes); |
5379 | goto loop; | 5453 | goto loop; |
5380 | } | 5454 | } |
5381 | 5455 | ||
@@ -5384,15 +5458,19 @@ checks: | |||
5384 | ins->offset = num_bytes; | 5458 | ins->offset = num_bytes; |
5385 | 5459 | ||
5386 | if (offset < search_start) | 5460 | if (offset < search_start) |
5387 | btrfs_add_free_space(block_group, offset, | 5461 | btrfs_add_free_space(used_block_group, offset, |
5388 | search_start - offset); | 5462 | search_start - offset); |
5389 | BUG_ON(offset > search_start); | 5463 | BUG_ON(offset > search_start); |
5464 | if (used_block_group != block_group) | ||
5465 | btrfs_put_block_group(used_block_group); | ||
5390 | btrfs_put_block_group(block_group); | 5466 | btrfs_put_block_group(block_group); |
5391 | break; | 5467 | break; |
5392 | loop: | 5468 | loop: |
5393 | failed_cluster_refill = false; | 5469 | failed_cluster_refill = false; |
5394 | failed_alloc = false; | 5470 | failed_alloc = false; |
5395 | BUG_ON(index != get_block_group_index(block_group)); | 5471 | BUG_ON(index != get_block_group_index(block_group)); |
5472 | if (used_block_group != block_group) | ||
5473 | btrfs_put_block_group(used_block_group); | ||
5396 | btrfs_put_block_group(block_group); | 5474 | btrfs_put_block_group(block_group); |
5397 | } | 5475 | } |
5398 | up_read(&space_info->groups_sem); | 5476 | up_read(&space_info->groups_sem); |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 1f87c4d0e7a0..49f3c9dc09f4 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -935,8 +935,10 @@ again: | |||
935 | node = tree_search(tree, start); | 935 | node = tree_search(tree, start); |
936 | if (!node) { | 936 | if (!node) { |
937 | prealloc = alloc_extent_state_atomic(prealloc); | 937 | prealloc = alloc_extent_state_atomic(prealloc); |
938 | if (!prealloc) | 938 | if (!prealloc) { |
939 | return -ENOMEM; | 939 | err = -ENOMEM; |
940 | goto out; | ||
941 | } | ||
940 | err = insert_state(tree, prealloc, start, end, &bits); | 942 | err = insert_state(tree, prealloc, start, end, &bits); |
941 | prealloc = NULL; | 943 | prealloc = NULL; |
942 | BUG_ON(err == -EEXIST); | 944 | BUG_ON(err == -EEXIST); |
@@ -992,8 +994,10 @@ hit_next: | |||
992 | */ | 994 | */ |
993 | if (state->start < start) { | 995 | if (state->start < start) { |
994 | prealloc = alloc_extent_state_atomic(prealloc); | 996 | prealloc = alloc_extent_state_atomic(prealloc); |
995 | if (!prealloc) | 997 | if (!prealloc) { |
996 | return -ENOMEM; | 998 | err = -ENOMEM; |
999 | goto out; | ||
1000 | } | ||
997 | err = split_state(tree, state, prealloc, start); | 1001 | err = split_state(tree, state, prealloc, start); |
998 | BUG_ON(err == -EEXIST); | 1002 | BUG_ON(err == -EEXIST); |
999 | prealloc = NULL; | 1003 | prealloc = NULL; |
@@ -1024,8 +1028,10 @@ hit_next: | |||
1024 | this_end = last_start - 1; | 1028 | this_end = last_start - 1; |
1025 | 1029 | ||
1026 | prealloc = alloc_extent_state_atomic(prealloc); | 1030 | prealloc = alloc_extent_state_atomic(prealloc); |
1027 | if (!prealloc) | 1031 | if (!prealloc) { |
1028 | return -ENOMEM; | 1032 | err = -ENOMEM; |
1033 | goto out; | ||
1034 | } | ||
1029 | 1035 | ||
1030 | /* | 1036 | /* |
1031 | * Avoid to free 'prealloc' if it can be merged with | 1037 | * Avoid to free 'prealloc' if it can be merged with |
@@ -1051,8 +1057,10 @@ hit_next: | |||
1051 | */ | 1057 | */ |
1052 | if (state->start <= end && state->end > end) { | 1058 | if (state->start <= end && state->end > end) { |
1053 | prealloc = alloc_extent_state_atomic(prealloc); | 1059 | prealloc = alloc_extent_state_atomic(prealloc); |
1054 | if (!prealloc) | 1060 | if (!prealloc) { |
1055 | return -ENOMEM; | 1061 | err = -ENOMEM; |
1062 | goto out; | ||
1063 | } | ||
1056 | 1064 | ||
1057 | err = split_state(tree, state, prealloc, end + 1); | 1065 | err = split_state(tree, state, prealloc, end + 1); |
1058 | BUG_ON(err == -EEXIST); | 1066 | BUG_ON(err == -EEXIST); |
@@ -2285,16 +2293,22 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
2285 | clean_io_failure(start, page); | 2293 | clean_io_failure(start, page); |
2286 | } | 2294 | } |
2287 | if (!uptodate) { | 2295 | if (!uptodate) { |
2288 | u64 failed_mirror; | 2296 | int failed_mirror; |
2289 | failed_mirror = (u64)bio->bi_bdev; | 2297 | failed_mirror = (int)(unsigned long)bio->bi_bdev; |
2290 | if (tree->ops && tree->ops->readpage_io_failed_hook) | 2298 | /* |
2291 | ret = tree->ops->readpage_io_failed_hook( | 2299 | * The generic bio_readpage_error handles errors the |
2292 | bio, page, start, end, | 2300 | * following way: If possible, new read requests are |
2293 | failed_mirror, state); | 2301 | * created and submitted and will end up in |
2294 | else | 2302 | * end_bio_extent_readpage as well (if we're lucky, not |
2295 | ret = bio_readpage_error(bio, page, start, end, | 2303 | * in the !uptodate case). In that case it returns 0 and |
2296 | failed_mirror, NULL); | 2304 | * we just go on with the next page in our bio. If it |
2305 | * can't handle the error it will return -EIO and we | ||
2306 | * remain responsible for that page. | ||
2307 | */ | ||
2308 | ret = bio_readpage_error(bio, page, start, end, | ||
2309 | failed_mirror, NULL); | ||
2297 | if (ret == 0) { | 2310 | if (ret == 0) { |
2311 | error_handled: | ||
2298 | uptodate = | 2312 | uptodate = |
2299 | test_bit(BIO_UPTODATE, &bio->bi_flags); | 2313 | test_bit(BIO_UPTODATE, &bio->bi_flags); |
2300 | if (err) | 2314 | if (err) |
@@ -2302,6 +2316,13 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
2302 | uncache_state(&cached); | 2316 | uncache_state(&cached); |
2303 | continue; | 2317 | continue; |
2304 | } | 2318 | } |
2319 | if (tree->ops && tree->ops->readpage_io_failed_hook) { | ||
2320 | ret = tree->ops->readpage_io_failed_hook( | ||
2321 | bio, page, start, end, | ||
2322 | failed_mirror, state); | ||
2323 | if (ret == 0) | ||
2324 | goto error_handled; | ||
2325 | } | ||
2305 | } | 2326 | } |
2306 | 2327 | ||
2307 | if (uptodate) { | 2328 | if (uptodate) { |
@@ -3366,6 +3387,9 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
3366 | return -ENOMEM; | 3387 | return -ENOMEM; |
3367 | path->leave_spinning = 1; | 3388 | path->leave_spinning = 1; |
3368 | 3389 | ||
3390 | start = ALIGN(start, BTRFS_I(inode)->root->sectorsize); | ||
3391 | len = ALIGN(len, BTRFS_I(inode)->root->sectorsize); | ||
3392 | |||
3369 | /* | 3393 | /* |
3370 | * lookup the last file extent. We're not using i_size here | 3394 | * lookup the last file extent. We're not using i_size here |
3371 | * because there might be preallocation past i_size | 3395 | * because there might be preallocation past i_size |
@@ -3413,7 +3437,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
3413 | lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, | 3437 | lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, |
3414 | &cached_state, GFP_NOFS); | 3438 | &cached_state, GFP_NOFS); |
3415 | 3439 | ||
3416 | em = get_extent_skip_holes(inode, off, last_for_get_extent, | 3440 | em = get_extent_skip_holes(inode, start, last_for_get_extent, |
3417 | get_extent); | 3441 | get_extent); |
3418 | if (!em) | 3442 | if (!em) |
3419 | goto out; | 3443 | goto out; |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index feb9be0e23bc..7604c3001322 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -70,7 +70,7 @@ struct extent_io_ops { | |||
70 | unsigned long bio_flags); | 70 | unsigned long bio_flags); |
71 | int (*readpage_io_hook)(struct page *page, u64 start, u64 end); | 71 | int (*readpage_io_hook)(struct page *page, u64 start, u64 end); |
72 | int (*readpage_io_failed_hook)(struct bio *bio, struct page *page, | 72 | int (*readpage_io_failed_hook)(struct bio *bio, struct page *page, |
73 | u64 start, u64 end, u64 failed_mirror, | 73 | u64 start, u64 end, int failed_mirror, |
74 | struct extent_state *state); | 74 | struct extent_state *state); |
75 | int (*writepage_io_failed_hook)(struct bio *bio, struct page *page, | 75 | int (*writepage_io_failed_hook)(struct bio *bio, struct page *page, |
76 | u64 start, u64 end, | 76 | u64 start, u64 end, |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index dafdfa059bf6..97fbe939c050 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -1167,6 +1167,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1167 | nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) / | 1167 | nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) / |
1168 | PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / | 1168 | PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / |
1169 | (sizeof(struct page *))); | 1169 | (sizeof(struct page *))); |
1170 | nrptrs = min(nrptrs, current->nr_dirtied_pause - current->nr_dirtied); | ||
1171 | nrptrs = max(nrptrs, 8); | ||
1170 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); | 1172 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); |
1171 | if (!pages) | 1173 | if (!pages) |
1172 | return -ENOMEM; | 1174 | return -ENOMEM; |
@@ -1387,7 +1389,11 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
1387 | goto out; | 1389 | goto out; |
1388 | } | 1390 | } |
1389 | 1391 | ||
1390 | file_update_time(file); | 1392 | err = btrfs_update_time(file); |
1393 | if (err) { | ||
1394 | mutex_unlock(&inode->i_mutex); | ||
1395 | goto out; | ||
1396 | } | ||
1391 | BTRFS_I(inode)->sequence++; | 1397 | BTRFS_I(inode)->sequence++; |
1392 | 1398 | ||
1393 | start_pos = round_down(pos, root->sectorsize); | 1399 | start_pos = round_down(pos, root->sectorsize); |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 7a15fcfb3e1f..ec23d43d0c35 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -351,6 +351,11 @@ static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode, | |||
351 | } | 351 | } |
352 | } | 352 | } |
353 | 353 | ||
354 | for (i = 0; i < io_ctl->num_pages; i++) { | ||
355 | clear_page_dirty_for_io(io_ctl->pages[i]); | ||
356 | set_page_extent_mapped(io_ctl->pages[i]); | ||
357 | } | ||
358 | |||
354 | return 0; | 359 | return 0; |
355 | } | 360 | } |
356 | 361 | ||
@@ -537,6 +542,13 @@ static int io_ctl_read_entry(struct io_ctl *io_ctl, | |||
537 | struct btrfs_free_space *entry, u8 *type) | 542 | struct btrfs_free_space *entry, u8 *type) |
538 | { | 543 | { |
539 | struct btrfs_free_space_entry *e; | 544 | struct btrfs_free_space_entry *e; |
545 | int ret; | ||
546 | |||
547 | if (!io_ctl->cur) { | ||
548 | ret = io_ctl_check_crc(io_ctl, io_ctl->index); | ||
549 | if (ret) | ||
550 | return ret; | ||
551 | } | ||
540 | 552 | ||
541 | e = io_ctl->cur; | 553 | e = io_ctl->cur; |
542 | entry->offset = le64_to_cpu(e->offset); | 554 | entry->offset = le64_to_cpu(e->offset); |
@@ -550,10 +562,7 @@ static int io_ctl_read_entry(struct io_ctl *io_ctl, | |||
550 | 562 | ||
551 | io_ctl_unmap_page(io_ctl); | 563 | io_ctl_unmap_page(io_ctl); |
552 | 564 | ||
553 | if (io_ctl->index >= io_ctl->num_pages) | 565 | return 0; |
554 | return 0; | ||
555 | |||
556 | return io_ctl_check_crc(io_ctl, io_ctl->index); | ||
557 | } | 566 | } |
558 | 567 | ||
559 | static int io_ctl_read_bitmap(struct io_ctl *io_ctl, | 568 | static int io_ctl_read_bitmap(struct io_ctl *io_ctl, |
@@ -561,9 +570,6 @@ static int io_ctl_read_bitmap(struct io_ctl *io_ctl, | |||
561 | { | 570 | { |
562 | int ret; | 571 | int ret; |
563 | 572 | ||
564 | if (io_ctl->cur && io_ctl->cur != io_ctl->orig) | ||
565 | io_ctl_unmap_page(io_ctl); | ||
566 | |||
567 | ret = io_ctl_check_crc(io_ctl, io_ctl->index); | 573 | ret = io_ctl_check_crc(io_ctl, io_ctl->index); |
568 | if (ret) | 574 | if (ret) |
569 | return ret; | 575 | return ret; |
@@ -699,6 +705,8 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | |||
699 | num_entries--; | 705 | num_entries--; |
700 | } | 706 | } |
701 | 707 | ||
708 | io_ctl_unmap_page(&io_ctl); | ||
709 | |||
702 | /* | 710 | /* |
703 | * We add the bitmaps at the end of the entries in order that | 711 | * We add the bitmaps at the end of the entries in order that |
704 | * the bitmap entries are added to the cache. | 712 | * the bitmap entries are added to the cache. |
@@ -1462,6 +1470,7 @@ static void add_new_bitmap(struct btrfs_free_space_ctl *ctl, | |||
1462 | { | 1470 | { |
1463 | info->offset = offset_to_bitmap(ctl, offset); | 1471 | info->offset = offset_to_bitmap(ctl, offset); |
1464 | info->bytes = 0; | 1472 | info->bytes = 0; |
1473 | INIT_LIST_HEAD(&info->list); | ||
1465 | link_free_space(ctl, info); | 1474 | link_free_space(ctl, info); |
1466 | ctl->total_bitmaps++; | 1475 | ctl->total_bitmaps++; |
1467 | 1476 | ||
@@ -1841,7 +1850,13 @@ again: | |||
1841 | info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), | 1850 | info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), |
1842 | 1, 0); | 1851 | 1, 0); |
1843 | if (!info) { | 1852 | if (!info) { |
1844 | WARN_ON(1); | 1853 | /* the tree logging code might be calling us before we |
1854 | * have fully loaded the free space rbtree for this | ||
1855 | * block group. So it is possible the entry won't | ||
1856 | * be in the rbtree yet at all. The caching code | ||
1857 | * will make sure not to put it in the rbtree if | ||
1858 | * the logging code has pinned it. | ||
1859 | */ | ||
1845 | goto out_lock; | 1860 | goto out_lock; |
1846 | } | 1861 | } |
1847 | } | 1862 | } |
@@ -2305,6 +2320,7 @@ again: | |||
2305 | 2320 | ||
2306 | if (!found) { | 2321 | if (!found) { |
2307 | start = i; | 2322 | start = i; |
2323 | cluster->max_size = 0; | ||
2308 | found = true; | 2324 | found = true; |
2309 | } | 2325 | } |
2310 | 2326 | ||
@@ -2448,16 +2464,23 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, | |||
2448 | { | 2464 | { |
2449 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; | 2465 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; |
2450 | struct btrfs_free_space *entry; | 2466 | struct btrfs_free_space *entry; |
2451 | struct rb_node *node; | ||
2452 | int ret = -ENOSPC; | 2467 | int ret = -ENOSPC; |
2468 | u64 bitmap_offset = offset_to_bitmap(ctl, offset); | ||
2453 | 2469 | ||
2454 | if (ctl->total_bitmaps == 0) | 2470 | if (ctl->total_bitmaps == 0) |
2455 | return -ENOSPC; | 2471 | return -ENOSPC; |
2456 | 2472 | ||
2457 | /* | 2473 | /* |
2458 | * First check our cached list of bitmaps and see if there is an entry | 2474 | * The bitmap that covers offset won't be in the list unless offset |
2459 | * here that will work. | 2475 | * is just its start offset. |
2460 | */ | 2476 | */ |
2477 | entry = list_first_entry(bitmaps, struct btrfs_free_space, list); | ||
2478 | if (entry->offset != bitmap_offset) { | ||
2479 | entry = tree_search_offset(ctl, bitmap_offset, 1, 0); | ||
2480 | if (entry && list_empty(&entry->list)) | ||
2481 | list_add(&entry->list, bitmaps); | ||
2482 | } | ||
2483 | |||
2461 | list_for_each_entry(entry, bitmaps, list) { | 2484 | list_for_each_entry(entry, bitmaps, list) { |
2462 | if (entry->bytes < min_bytes) | 2485 | if (entry->bytes < min_bytes) |
2463 | continue; | 2486 | continue; |
@@ -2468,38 +2491,10 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, | |||
2468 | } | 2491 | } |
2469 | 2492 | ||
2470 | /* | 2493 | /* |
2471 | * If we do have entries on our list and we are here then we didn't find | 2494 | * The bitmaps list has all the bitmaps that record free space |
2472 | * anything, so go ahead and get the next entry after the last entry in | 2495 | * starting after offset, so no more search is required. |
2473 | * this list and start the search from there. | ||
2474 | */ | 2496 | */ |
2475 | if (!list_empty(bitmaps)) { | 2497 | return -ENOSPC; |
2476 | entry = list_entry(bitmaps->prev, struct btrfs_free_space, | ||
2477 | list); | ||
2478 | node = rb_next(&entry->offset_index); | ||
2479 | if (!node) | ||
2480 | return -ENOSPC; | ||
2481 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | ||
2482 | goto search; | ||
2483 | } | ||
2484 | |||
2485 | entry = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 0, 1); | ||
2486 | if (!entry) | ||
2487 | return -ENOSPC; | ||
2488 | |||
2489 | search: | ||
2490 | node = &entry->offset_index; | ||
2491 | do { | ||
2492 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | ||
2493 | node = rb_next(&entry->offset_index); | ||
2494 | if (!entry->bitmap) | ||
2495 | continue; | ||
2496 | if (entry->bytes < min_bytes) | ||
2497 | continue; | ||
2498 | ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, | ||
2499 | bytes, min_bytes); | ||
2500 | } while (ret && node); | ||
2501 | |||
2502 | return ret; | ||
2503 | } | 2498 | } |
2504 | 2499 | ||
2505 | /* | 2500 | /* |
@@ -2517,8 +2512,8 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, | |||
2517 | u64 offset, u64 bytes, u64 empty_size) | 2512 | u64 offset, u64 bytes, u64 empty_size) |
2518 | { | 2513 | { |
2519 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; | 2514 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; |
2520 | struct list_head bitmaps; | ||
2521 | struct btrfs_free_space *entry, *tmp; | 2515 | struct btrfs_free_space *entry, *tmp; |
2516 | LIST_HEAD(bitmaps); | ||
2522 | u64 min_bytes; | 2517 | u64 min_bytes; |
2523 | int ret; | 2518 | int ret; |
2524 | 2519 | ||
@@ -2557,7 +2552,6 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, | |||
2557 | goto out; | 2552 | goto out; |
2558 | } | 2553 | } |
2559 | 2554 | ||
2560 | INIT_LIST_HEAD(&bitmaps); | ||
2561 | ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset, | 2555 | ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset, |
2562 | bytes, min_bytes); | 2556 | bytes, min_bytes); |
2563 | if (ret) | 2557 | if (ret) |
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 53dcbdf446cd..f8962a957d65 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c | |||
@@ -398,6 +398,8 @@ int btrfs_save_ino_cache(struct btrfs_root *root, | |||
398 | struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; | 398 | struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; |
399 | struct btrfs_path *path; | 399 | struct btrfs_path *path; |
400 | struct inode *inode; | 400 | struct inode *inode; |
401 | struct btrfs_block_rsv *rsv; | ||
402 | u64 num_bytes; | ||
401 | u64 alloc_hint = 0; | 403 | u64 alloc_hint = 0; |
402 | int ret; | 404 | int ret; |
403 | int prealloc; | 405 | int prealloc; |
@@ -421,11 +423,26 @@ int btrfs_save_ino_cache(struct btrfs_root *root, | |||
421 | if (!path) | 423 | if (!path) |
422 | return -ENOMEM; | 424 | return -ENOMEM; |
423 | 425 | ||
426 | rsv = trans->block_rsv; | ||
427 | trans->block_rsv = &root->fs_info->trans_block_rsv; | ||
428 | |||
429 | num_bytes = trans->bytes_reserved; | ||
430 | /* | ||
431 | * 1 item for inode item insertion if need | ||
432 | * 3 items for inode item update (in the worst case) | ||
433 | * 1 item for free space object | ||
434 | * 3 items for pre-allocation | ||
435 | */ | ||
436 | trans->bytes_reserved = btrfs_calc_trans_metadata_size(root, 8); | ||
437 | ret = btrfs_block_rsv_add_noflush(root, trans->block_rsv, | ||
438 | trans->bytes_reserved); | ||
439 | if (ret) | ||
440 | goto out; | ||
424 | again: | 441 | again: |
425 | inode = lookup_free_ino_inode(root, path); | 442 | inode = lookup_free_ino_inode(root, path); |
426 | if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { | 443 | if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { |
427 | ret = PTR_ERR(inode); | 444 | ret = PTR_ERR(inode); |
428 | goto out; | 445 | goto out_release; |
429 | } | 446 | } |
430 | 447 | ||
431 | if (IS_ERR(inode)) { | 448 | if (IS_ERR(inode)) { |
@@ -434,7 +451,7 @@ again: | |||
434 | 451 | ||
435 | ret = create_free_ino_inode(root, trans, path); | 452 | ret = create_free_ino_inode(root, trans, path); |
436 | if (ret) | 453 | if (ret) |
437 | goto out; | 454 | goto out_release; |
438 | goto again; | 455 | goto again; |
439 | } | 456 | } |
440 | 457 | ||
@@ -477,11 +494,14 @@ again: | |||
477 | } | 494 | } |
478 | btrfs_free_reserved_data_space(inode, prealloc); | 495 | btrfs_free_reserved_data_space(inode, prealloc); |
479 | 496 | ||
497 | ret = btrfs_write_out_ino_cache(root, trans, path); | ||
480 | out_put: | 498 | out_put: |
481 | iput(inode); | 499 | iput(inode); |
500 | out_release: | ||
501 | btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved); | ||
482 | out: | 502 | out: |
483 | if (ret == 0) | 503 | trans->block_rsv = rsv; |
484 | ret = btrfs_write_out_ino_cache(root, trans, path); | 504 | trans->bytes_reserved = num_bytes; |
485 | 505 | ||
486 | btrfs_free_path(path); | 506 | btrfs_free_path(path); |
487 | return ret; | 507 | return ret; |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 966ddcc4c63d..0a6b928813a4 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include <linux/falloc.h> | 38 | #include <linux/falloc.h> |
39 | #include <linux/slab.h> | 39 | #include <linux/slab.h> |
40 | #include <linux/ratelimit.h> | 40 | #include <linux/ratelimit.h> |
41 | #include <linux/mount.h> | ||
41 | #include "compat.h" | 42 | #include "compat.h" |
42 | #include "ctree.h" | 43 | #include "ctree.h" |
43 | #include "disk-io.h" | 44 | #include "disk-io.h" |
@@ -93,6 +94,8 @@ static noinline int cow_file_range(struct inode *inode, | |||
93 | struct page *locked_page, | 94 | struct page *locked_page, |
94 | u64 start, u64 end, int *page_started, | 95 | u64 start, u64 end, int *page_started, |
95 | unsigned long *nr_written, int unlock); | 96 | unsigned long *nr_written, int unlock); |
97 | static noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans, | ||
98 | struct btrfs_root *root, struct inode *inode); | ||
96 | 99 | ||
97 | static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, | 100 | static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, |
98 | struct inode *inode, struct inode *dir, | 101 | struct inode *inode, struct inode *dir, |
@@ -1741,7 +1744,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1741 | trans = btrfs_join_transaction(root); | 1744 | trans = btrfs_join_transaction(root); |
1742 | BUG_ON(IS_ERR(trans)); | 1745 | BUG_ON(IS_ERR(trans)); |
1743 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | 1746 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; |
1744 | ret = btrfs_update_inode(trans, root, inode); | 1747 | ret = btrfs_update_inode_fallback(trans, root, inode); |
1745 | BUG_ON(ret); | 1748 | BUG_ON(ret); |
1746 | } | 1749 | } |
1747 | goto out; | 1750 | goto out; |
@@ -1791,7 +1794,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1791 | 1794 | ||
1792 | ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); | 1795 | ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); |
1793 | if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { | 1796 | if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { |
1794 | ret = btrfs_update_inode(trans, root, inode); | 1797 | ret = btrfs_update_inode_fallback(trans, root, inode); |
1795 | BUG_ON(ret); | 1798 | BUG_ON(ret); |
1796 | } | 1799 | } |
1797 | ret = 0; | 1800 | ret = 0; |
@@ -2029,7 +2032,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) | |||
2029 | /* insert an orphan item to track this unlinked/truncated file */ | 2032 | /* insert an orphan item to track this unlinked/truncated file */ |
2030 | if (insert >= 1) { | 2033 | if (insert >= 1) { |
2031 | ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); | 2034 | ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); |
2032 | BUG_ON(ret); | 2035 | BUG_ON(ret && ret != -EEXIST); |
2033 | } | 2036 | } |
2034 | 2037 | ||
2035 | /* insert an orphan item to track subvolume contains orphan files */ | 2038 | /* insert an orphan item to track subvolume contains orphan files */ |
@@ -2156,6 +2159,38 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2156 | if (ret && ret != -ESTALE) | 2159 | if (ret && ret != -ESTALE) |
2157 | goto out; | 2160 | goto out; |
2158 | 2161 | ||
2162 | if (ret == -ESTALE && root == root->fs_info->tree_root) { | ||
2163 | struct btrfs_root *dead_root; | ||
2164 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
2165 | int is_dead_root = 0; | ||
2166 | |||
2167 | /* | ||
2168 | * this is an orphan in the tree root. Currently these | ||
2169 | * could come from 2 sources: | ||
2170 | * a) a snapshot deletion in progress | ||
2171 | * b) a free space cache inode | ||
2172 | * We need to distinguish those two, as the snapshot | ||
2173 | * orphan must not get deleted. | ||
2174 | * find_dead_roots already ran before us, so if this | ||
2175 | * is a snapshot deletion, we should find the root | ||
2176 | * in the dead_roots list | ||
2177 | */ | ||
2178 | spin_lock(&fs_info->trans_lock); | ||
2179 | list_for_each_entry(dead_root, &fs_info->dead_roots, | ||
2180 | root_list) { | ||
2181 | if (dead_root->root_key.objectid == | ||
2182 | found_key.objectid) { | ||
2183 | is_dead_root = 1; | ||
2184 | break; | ||
2185 | } | ||
2186 | } | ||
2187 | spin_unlock(&fs_info->trans_lock); | ||
2188 | if (is_dead_root) { | ||
2189 | /* prevent this orphan from being found again */ | ||
2190 | key.offset = found_key.objectid - 1; | ||
2191 | continue; | ||
2192 | } | ||
2193 | } | ||
2159 | /* | 2194 | /* |
2160 | * Inode is already gone but the orphan item is still there, | 2195 | * Inode is already gone but the orphan item is still there, |
2161 | * kill the orphan item. | 2196 | * kill the orphan item. |
@@ -2189,7 +2224,14 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2189 | continue; | 2224 | continue; |
2190 | } | 2225 | } |
2191 | nr_truncate++; | 2226 | nr_truncate++; |
2227 | /* | ||
2228 | * Need to hold the imutex for reservation purposes, not | ||
2229 | * a huge deal here but I have a WARN_ON in | ||
2230 | * btrfs_delalloc_reserve_space to catch offenders. | ||
2231 | */ | ||
2232 | mutex_lock(&inode->i_mutex); | ||
2192 | ret = btrfs_truncate(inode); | 2233 | ret = btrfs_truncate(inode); |
2234 | mutex_unlock(&inode->i_mutex); | ||
2193 | } else { | 2235 | } else { |
2194 | nr_unlink++; | 2236 | nr_unlink++; |
2195 | } | 2237 | } |
@@ -2199,6 +2241,9 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2199 | if (ret) | 2241 | if (ret) |
2200 | goto out; | 2242 | goto out; |
2201 | } | 2243 | } |
2244 | /* release the path since we're done with it */ | ||
2245 | btrfs_release_path(path); | ||
2246 | |||
2202 | root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE; | 2247 | root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE; |
2203 | 2248 | ||
2204 | if (root->orphan_block_rsv) | 2249 | if (root->orphan_block_rsv) |
@@ -2426,7 +2471,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, | |||
2426 | /* | 2471 | /* |
2427 | * copy everything in the in-memory inode into the btree. | 2472 | * copy everything in the in-memory inode into the btree. |
2428 | */ | 2473 | */ |
2429 | noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, | 2474 | static noinline int btrfs_update_inode_item(struct btrfs_trans_handle *trans, |
2430 | struct btrfs_root *root, struct inode *inode) | 2475 | struct btrfs_root *root, struct inode *inode) |
2431 | { | 2476 | { |
2432 | struct btrfs_inode_item *inode_item; | 2477 | struct btrfs_inode_item *inode_item; |
@@ -2434,21 +2479,6 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, | |||
2434 | struct extent_buffer *leaf; | 2479 | struct extent_buffer *leaf; |
2435 | int ret; | 2480 | int ret; |
2436 | 2481 | ||
2437 | /* | ||
2438 | * If the inode is a free space inode, we can deadlock during commit | ||
2439 | * if we put it into the delayed code. | ||
2440 | * | ||
2441 | * The data relocation inode should also be directly updated | ||
2442 | * without delay | ||
2443 | */ | ||
2444 | if (!btrfs_is_free_space_inode(root, inode) | ||
2445 | && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) { | ||
2446 | ret = btrfs_delayed_update_inode(trans, root, inode); | ||
2447 | if (!ret) | ||
2448 | btrfs_set_inode_last_trans(trans, inode); | ||
2449 | return ret; | ||
2450 | } | ||
2451 | |||
2452 | path = btrfs_alloc_path(); | 2482 | path = btrfs_alloc_path(); |
2453 | if (!path) | 2483 | if (!path) |
2454 | return -ENOMEM; | 2484 | return -ENOMEM; |
@@ -2477,6 +2507,43 @@ failed: | |||
2477 | } | 2507 | } |
2478 | 2508 | ||
2479 | /* | 2509 | /* |
2510 | * copy everything in the in-memory inode into the btree. | ||
2511 | */ | ||
2512 | noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, | ||
2513 | struct btrfs_root *root, struct inode *inode) | ||
2514 | { | ||
2515 | int ret; | ||
2516 | |||
2517 | /* | ||
2518 | * If the inode is a free space inode, we can deadlock during commit | ||
2519 | * if we put it into the delayed code. | ||
2520 | * | ||
2521 | * The data relocation inode should also be directly updated | ||
2522 | * without delay | ||
2523 | */ | ||
2524 | if (!btrfs_is_free_space_inode(root, inode) | ||
2525 | && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) { | ||
2526 | ret = btrfs_delayed_update_inode(trans, root, inode); | ||
2527 | if (!ret) | ||
2528 | btrfs_set_inode_last_trans(trans, inode); | ||
2529 | return ret; | ||
2530 | } | ||
2531 | |||
2532 | return btrfs_update_inode_item(trans, root, inode); | ||
2533 | } | ||
2534 | |||
2535 | static noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans, | ||
2536 | struct btrfs_root *root, struct inode *inode) | ||
2537 | { | ||
2538 | int ret; | ||
2539 | |||
2540 | ret = btrfs_update_inode(trans, root, inode); | ||
2541 | if (ret == -ENOSPC) | ||
2542 | return btrfs_update_inode_item(trans, root, inode); | ||
2543 | return ret; | ||
2544 | } | ||
2545 | |||
2546 | /* | ||
2480 | * unlink helper that gets used here in inode.c and in the tree logging | 2547 | * unlink helper that gets used here in inode.c and in the tree logging |
2481 | * recovery code. It remove a link in a directory with a given name, and | 2548 | * recovery code. It remove a link in a directory with a given name, and |
2482 | * also drops the back refs in the inode to the directory | 2549 | * also drops the back refs in the inode to the directory |
@@ -3300,7 +3367,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) | |||
3300 | u64 hint_byte = 0; | 3367 | u64 hint_byte = 0; |
3301 | hole_size = last_byte - cur_offset; | 3368 | hole_size = last_byte - cur_offset; |
3302 | 3369 | ||
3303 | trans = btrfs_start_transaction(root, 2); | 3370 | trans = btrfs_start_transaction(root, 3); |
3304 | if (IS_ERR(trans)) { | 3371 | if (IS_ERR(trans)) { |
3305 | err = PTR_ERR(trans); | 3372 | err = PTR_ERR(trans); |
3306 | break; | 3373 | break; |
@@ -3310,6 +3377,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) | |||
3310 | cur_offset + hole_size, | 3377 | cur_offset + hole_size, |
3311 | &hint_byte, 1); | 3378 | &hint_byte, 1); |
3312 | if (err) { | 3379 | if (err) { |
3380 | btrfs_update_inode(trans, root, inode); | ||
3313 | btrfs_end_transaction(trans, root); | 3381 | btrfs_end_transaction(trans, root); |
3314 | break; | 3382 | break; |
3315 | } | 3383 | } |
@@ -3319,6 +3387,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) | |||
3319 | 0, hole_size, 0, hole_size, | 3387 | 0, hole_size, 0, hole_size, |
3320 | 0, 0, 0); | 3388 | 0, 0, 0); |
3321 | if (err) { | 3389 | if (err) { |
3390 | btrfs_update_inode(trans, root, inode); | ||
3322 | btrfs_end_transaction(trans, root); | 3391 | btrfs_end_transaction(trans, root); |
3323 | break; | 3392 | break; |
3324 | } | 3393 | } |
@@ -3326,6 +3395,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) | |||
3326 | btrfs_drop_extent_cache(inode, hole_start, | 3395 | btrfs_drop_extent_cache(inode, hole_start, |
3327 | last_byte - 1, 0); | 3396 | last_byte - 1, 0); |
3328 | 3397 | ||
3398 | btrfs_update_inode(trans, root, inode); | ||
3329 | btrfs_end_transaction(trans, root); | 3399 | btrfs_end_transaction(trans, root); |
3330 | } | 3400 | } |
3331 | free_extent_map(em); | 3401 | free_extent_map(em); |
@@ -3343,6 +3413,8 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) | |||
3343 | 3413 | ||
3344 | static int btrfs_setsize(struct inode *inode, loff_t newsize) | 3414 | static int btrfs_setsize(struct inode *inode, loff_t newsize) |
3345 | { | 3415 | { |
3416 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
3417 | struct btrfs_trans_handle *trans; | ||
3346 | loff_t oldsize = i_size_read(inode); | 3418 | loff_t oldsize = i_size_read(inode); |
3347 | int ret; | 3419 | int ret; |
3348 | 3420 | ||
@@ -3350,16 +3422,19 @@ static int btrfs_setsize(struct inode *inode, loff_t newsize) | |||
3350 | return 0; | 3422 | return 0; |
3351 | 3423 | ||
3352 | if (newsize > oldsize) { | 3424 | if (newsize > oldsize) { |
3353 | i_size_write(inode, newsize); | ||
3354 | btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL); | ||
3355 | truncate_pagecache(inode, oldsize, newsize); | 3425 | truncate_pagecache(inode, oldsize, newsize); |
3356 | ret = btrfs_cont_expand(inode, oldsize, newsize); | 3426 | ret = btrfs_cont_expand(inode, oldsize, newsize); |
3357 | if (ret) { | 3427 | if (ret) |
3358 | btrfs_setsize(inode, oldsize); | ||
3359 | return ret; | 3428 | return ret; |
3360 | } | ||
3361 | 3429 | ||
3362 | mark_inode_dirty(inode); | 3430 | trans = btrfs_start_transaction(root, 1); |
3431 | if (IS_ERR(trans)) | ||
3432 | return PTR_ERR(trans); | ||
3433 | |||
3434 | i_size_write(inode, newsize); | ||
3435 | btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL); | ||
3436 | ret = btrfs_update_inode(trans, root, inode); | ||
3437 | btrfs_end_transaction_throttle(trans, root); | ||
3363 | } else { | 3438 | } else { |
3364 | 3439 | ||
3365 | /* | 3440 | /* |
@@ -3399,9 +3474,9 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
3399 | 3474 | ||
3400 | if (attr->ia_valid) { | 3475 | if (attr->ia_valid) { |
3401 | setattr_copy(inode, attr); | 3476 | setattr_copy(inode, attr); |
3402 | mark_inode_dirty(inode); | 3477 | err = btrfs_dirty_inode(inode); |
3403 | 3478 | ||
3404 | if (attr->ia_valid & ATTR_MODE) | 3479 | if (!err && attr->ia_valid & ATTR_MODE) |
3405 | err = btrfs_acl_chmod(inode); | 3480 | err = btrfs_acl_chmod(inode); |
3406 | } | 3481 | } |
3407 | 3482 | ||
@@ -3463,7 +3538,7 @@ void btrfs_evict_inode(struct inode *inode) | |||
3463 | * doing the truncate. | 3538 | * doing the truncate. |
3464 | */ | 3539 | */ |
3465 | while (1) { | 3540 | while (1) { |
3466 | ret = btrfs_block_rsv_refill(root, rsv, min_size); | 3541 | ret = btrfs_block_rsv_refill_noflush(root, rsv, min_size); |
3467 | 3542 | ||
3468 | /* | 3543 | /* |
3469 | * Try and steal from the global reserve since we will | 3544 | * Try and steal from the global reserve since we will |
@@ -4177,42 +4252,80 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
4177 | * FIXME, needs more benchmarking...there are no reasons other than performance | 4252 | * FIXME, needs more benchmarking...there are no reasons other than performance |
4178 | * to keep or drop this code. | 4253 | * to keep or drop this code. |
4179 | */ | 4254 | */ |
4180 | void btrfs_dirty_inode(struct inode *inode, int flags) | 4255 | int btrfs_dirty_inode(struct inode *inode) |
4181 | { | 4256 | { |
4182 | struct btrfs_root *root = BTRFS_I(inode)->root; | 4257 | struct btrfs_root *root = BTRFS_I(inode)->root; |
4183 | struct btrfs_trans_handle *trans; | 4258 | struct btrfs_trans_handle *trans; |
4184 | int ret; | 4259 | int ret; |
4185 | 4260 | ||
4186 | if (BTRFS_I(inode)->dummy_inode) | 4261 | if (BTRFS_I(inode)->dummy_inode) |
4187 | return; | 4262 | return 0; |
4188 | 4263 | ||
4189 | trans = btrfs_join_transaction(root); | 4264 | trans = btrfs_join_transaction(root); |
4190 | BUG_ON(IS_ERR(trans)); | 4265 | if (IS_ERR(trans)) |
4266 | return PTR_ERR(trans); | ||
4191 | 4267 | ||
4192 | ret = btrfs_update_inode(trans, root, inode); | 4268 | ret = btrfs_update_inode(trans, root, inode); |
4193 | if (ret && ret == -ENOSPC) { | 4269 | if (ret && ret == -ENOSPC) { |
4194 | /* whoops, lets try again with the full transaction */ | 4270 | /* whoops, lets try again with the full transaction */ |
4195 | btrfs_end_transaction(trans, root); | 4271 | btrfs_end_transaction(trans, root); |
4196 | trans = btrfs_start_transaction(root, 1); | 4272 | trans = btrfs_start_transaction(root, 1); |
4197 | if (IS_ERR(trans)) { | 4273 | if (IS_ERR(trans)) |
4198 | printk_ratelimited(KERN_ERR "btrfs: fail to " | 4274 | return PTR_ERR(trans); |
4199 | "dirty inode %llu error %ld\n", | ||
4200 | (unsigned long long)btrfs_ino(inode), | ||
4201 | PTR_ERR(trans)); | ||
4202 | return; | ||
4203 | } | ||
4204 | 4275 | ||
4205 | ret = btrfs_update_inode(trans, root, inode); | 4276 | ret = btrfs_update_inode(trans, root, inode); |
4206 | if (ret) { | ||
4207 | printk_ratelimited(KERN_ERR "btrfs: fail to " | ||
4208 | "dirty inode %llu error %d\n", | ||
4209 | (unsigned long long)btrfs_ino(inode), | ||
4210 | ret); | ||
4211 | } | ||
4212 | } | 4277 | } |
4213 | btrfs_end_transaction(trans, root); | 4278 | btrfs_end_transaction(trans, root); |
4214 | if (BTRFS_I(inode)->delayed_node) | 4279 | if (BTRFS_I(inode)->delayed_node) |
4215 | btrfs_balance_delayed_items(root); | 4280 | btrfs_balance_delayed_items(root); |
4281 | |||
4282 | return ret; | ||
4283 | } | ||
4284 | |||
4285 | /* | ||
4286 | * This is a copy of file_update_time. We need this so we can return error on | ||
4287 | * ENOSPC for updating the inode in the case of file write and mmap writes. | ||
4288 | */ | ||
4289 | int btrfs_update_time(struct file *file) | ||
4290 | { | ||
4291 | struct inode *inode = file->f_path.dentry->d_inode; | ||
4292 | struct timespec now; | ||
4293 | int ret; | ||
4294 | enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0; | ||
4295 | |||
4296 | /* First try to exhaust all avenues to not sync */ | ||
4297 | if (IS_NOCMTIME(inode)) | ||
4298 | return 0; | ||
4299 | |||
4300 | now = current_fs_time(inode->i_sb); | ||
4301 | if (!timespec_equal(&inode->i_mtime, &now)) | ||
4302 | sync_it = S_MTIME; | ||
4303 | |||
4304 | if (!timespec_equal(&inode->i_ctime, &now)) | ||
4305 | sync_it |= S_CTIME; | ||
4306 | |||
4307 | if (IS_I_VERSION(inode)) | ||
4308 | sync_it |= S_VERSION; | ||
4309 | |||
4310 | if (!sync_it) | ||
4311 | return 0; | ||
4312 | |||
4313 | /* Finally allowed to write? Takes lock. */ | ||
4314 | if (mnt_want_write_file(file)) | ||
4315 | return 0; | ||
4316 | |||
4317 | /* Only change inode inside the lock region */ | ||
4318 | if (sync_it & S_VERSION) | ||
4319 | inode_inc_iversion(inode); | ||
4320 | if (sync_it & S_CTIME) | ||
4321 | inode->i_ctime = now; | ||
4322 | if (sync_it & S_MTIME) | ||
4323 | inode->i_mtime = now; | ||
4324 | ret = btrfs_dirty_inode(inode); | ||
4325 | if (!ret) | ||
4326 | mark_inode_dirty_sync(inode); | ||
4327 | mnt_drop_write(file->f_path.mnt); | ||
4328 | return ret; | ||
4216 | } | 4329 | } |
4217 | 4330 | ||
4218 | /* | 4331 | /* |
@@ -4528,11 +4641,18 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
4528 | goto out_unlock; | 4641 | goto out_unlock; |
4529 | } | 4642 | } |
4530 | 4643 | ||
4644 | /* | ||
4645 | * If the active LSM wants to access the inode during | ||
4646 | * d_instantiate it needs these. Smack checks to see | ||
4647 | * if the filesystem supports xattrs by looking at the | ||
4648 | * ops vector. | ||
4649 | */ | ||
4650 | |||
4651 | inode->i_op = &btrfs_special_inode_operations; | ||
4531 | err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); | 4652 | err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); |
4532 | if (err) | 4653 | if (err) |
4533 | drop_inode = 1; | 4654 | drop_inode = 1; |
4534 | else { | 4655 | else { |
4535 | inode->i_op = &btrfs_special_inode_operations; | ||
4536 | init_special_inode(inode, inode->i_mode, rdev); | 4656 | init_special_inode(inode, inode->i_mode, rdev); |
4537 | btrfs_update_inode(trans, root, inode); | 4657 | btrfs_update_inode(trans, root, inode); |
4538 | } | 4658 | } |
@@ -4586,14 +4706,21 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
4586 | goto out_unlock; | 4706 | goto out_unlock; |
4587 | } | 4707 | } |
4588 | 4708 | ||
4709 | /* | ||
4710 | * If the active LSM wants to access the inode during | ||
4711 | * d_instantiate it needs these. Smack checks to see | ||
4712 | * if the filesystem supports xattrs by looking at the | ||
4713 | * ops vector. | ||
4714 | */ | ||
4715 | inode->i_fop = &btrfs_file_operations; | ||
4716 | inode->i_op = &btrfs_file_inode_operations; | ||
4717 | |||
4589 | err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); | 4718 | err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); |
4590 | if (err) | 4719 | if (err) |
4591 | drop_inode = 1; | 4720 | drop_inode = 1; |
4592 | else { | 4721 | else { |
4593 | inode->i_mapping->a_ops = &btrfs_aops; | 4722 | inode->i_mapping->a_ops = &btrfs_aops; |
4594 | inode->i_mapping->backing_dev_info = &root->fs_info->bdi; | 4723 | inode->i_mapping->backing_dev_info = &root->fs_info->bdi; |
4595 | inode->i_fop = &btrfs_file_operations; | ||
4596 | inode->i_op = &btrfs_file_inode_operations; | ||
4597 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; | 4724 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; |
4598 | } | 4725 | } |
4599 | out_unlock: | 4726 | out_unlock: |
@@ -5632,7 +5759,7 @@ again: | |||
5632 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) { | 5759 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) { |
5633 | ret = btrfs_ordered_update_i_size(inode, 0, ordered); | 5760 | ret = btrfs_ordered_update_i_size(inode, 0, ordered); |
5634 | if (!ret) | 5761 | if (!ret) |
5635 | err = btrfs_update_inode(trans, root, inode); | 5762 | err = btrfs_update_inode_fallback(trans, root, inode); |
5636 | goto out; | 5763 | goto out; |
5637 | } | 5764 | } |
5638 | 5765 | ||
@@ -5670,7 +5797,7 @@ again: | |||
5670 | add_pending_csums(trans, inode, ordered->file_offset, &ordered->list); | 5797 | add_pending_csums(trans, inode, ordered->file_offset, &ordered->list); |
5671 | ret = btrfs_ordered_update_i_size(inode, 0, ordered); | 5798 | ret = btrfs_ordered_update_i_size(inode, 0, ordered); |
5672 | if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) | 5799 | if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) |
5673 | btrfs_update_inode(trans, root, inode); | 5800 | btrfs_update_inode_fallback(trans, root, inode); |
5674 | ret = 0; | 5801 | ret = 0; |
5675 | out_unlock: | 5802 | out_unlock: |
5676 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset, | 5803 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset, |
@@ -6276,7 +6403,12 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
6276 | u64 page_start; | 6403 | u64 page_start; |
6277 | u64 page_end; | 6404 | u64 page_end; |
6278 | 6405 | ||
6406 | /* Need this to keep space reservations serialized */ | ||
6407 | mutex_lock(&inode->i_mutex); | ||
6279 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); | 6408 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); |
6409 | mutex_unlock(&inode->i_mutex); | ||
6410 | if (!ret) | ||
6411 | ret = btrfs_update_time(vma->vm_file); | ||
6280 | if (ret) { | 6412 | if (ret) { |
6281 | if (ret == -ENOMEM) | 6413 | if (ret == -ENOMEM) |
6282 | ret = VM_FAULT_OOM; | 6414 | ret = VM_FAULT_OOM; |
@@ -6488,8 +6620,9 @@ static int btrfs_truncate(struct inode *inode) | |||
6488 | /* Just need the 1 for updating the inode */ | 6620 | /* Just need the 1 for updating the inode */ |
6489 | trans = btrfs_start_transaction(root, 1); | 6621 | trans = btrfs_start_transaction(root, 1); |
6490 | if (IS_ERR(trans)) { | 6622 | if (IS_ERR(trans)) { |
6491 | err = PTR_ERR(trans); | 6623 | ret = err = PTR_ERR(trans); |
6492 | goto out; | 6624 | trans = NULL; |
6625 | break; | ||
6493 | } | 6626 | } |
6494 | } | 6627 | } |
6495 | 6628 | ||
@@ -6529,14 +6662,16 @@ end_trans: | |||
6529 | ret = btrfs_orphan_del(NULL, inode); | 6662 | ret = btrfs_orphan_del(NULL, inode); |
6530 | } | 6663 | } |
6531 | 6664 | ||
6532 | trans->block_rsv = &root->fs_info->trans_block_rsv; | 6665 | if (trans) { |
6533 | ret = btrfs_update_inode(trans, root, inode); | 6666 | trans->block_rsv = &root->fs_info->trans_block_rsv; |
6534 | if (ret && !err) | 6667 | ret = btrfs_update_inode(trans, root, inode); |
6535 | err = ret; | 6668 | if (ret && !err) |
6669 | err = ret; | ||
6536 | 6670 | ||
6537 | nr = trans->blocks_used; | 6671 | nr = trans->blocks_used; |
6538 | ret = btrfs_end_transaction_throttle(trans, root); | 6672 | ret = btrfs_end_transaction_throttle(trans, root); |
6539 | btrfs_btree_balance_dirty(root, nr); | 6673 | btrfs_btree_balance_dirty(root, nr); |
6674 | } | ||
6540 | 6675 | ||
6541 | out: | 6676 | out: |
6542 | btrfs_free_block_rsv(root, rsv); | 6677 | btrfs_free_block_rsv(root, rsv); |
@@ -6605,6 +6740,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
6605 | ei->orphan_meta_reserved = 0; | 6740 | ei->orphan_meta_reserved = 0; |
6606 | ei->dummy_inode = 0; | 6741 | ei->dummy_inode = 0; |
6607 | ei->in_defrag = 0; | 6742 | ei->in_defrag = 0; |
6743 | ei->delalloc_meta_reserved = 0; | ||
6608 | ei->force_compress = BTRFS_COMPRESS_NONE; | 6744 | ei->force_compress = BTRFS_COMPRESS_NONE; |
6609 | 6745 | ||
6610 | ei->delayed_node = NULL; | 6746 | ei->delayed_node = NULL; |
@@ -6764,11 +6900,13 @@ static int btrfs_getattr(struct vfsmount *mnt, | |||
6764 | struct dentry *dentry, struct kstat *stat) | 6900 | struct dentry *dentry, struct kstat *stat) |
6765 | { | 6901 | { |
6766 | struct inode *inode = dentry->d_inode; | 6902 | struct inode *inode = dentry->d_inode; |
6903 | u32 blocksize = inode->i_sb->s_blocksize; | ||
6904 | |||
6767 | generic_fillattr(inode, stat); | 6905 | generic_fillattr(inode, stat); |
6768 | stat->dev = BTRFS_I(inode)->root->anon_dev; | 6906 | stat->dev = BTRFS_I(inode)->root->anon_dev; |
6769 | stat->blksize = PAGE_CACHE_SIZE; | 6907 | stat->blksize = PAGE_CACHE_SIZE; |
6770 | stat->blocks = (inode_get_bytes(inode) + | 6908 | stat->blocks = (ALIGN(inode_get_bytes(inode), blocksize) + |
6771 | BTRFS_I(inode)->delalloc_bytes) >> 9; | 6909 | ALIGN(BTRFS_I(inode)->delalloc_bytes, blocksize)) >> 9; |
6772 | return 0; | 6910 | return 0; |
6773 | } | 6911 | } |
6774 | 6912 | ||
@@ -7044,14 +7182,21 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
7044 | goto out_unlock; | 7182 | goto out_unlock; |
7045 | } | 7183 | } |
7046 | 7184 | ||
7185 | /* | ||
7186 | * If the active LSM wants to access the inode during | ||
7187 | * d_instantiate it needs these. Smack checks to see | ||
7188 | * if the filesystem supports xattrs by looking at the | ||
7189 | * ops vector. | ||
7190 | */ | ||
7191 | inode->i_fop = &btrfs_file_operations; | ||
7192 | inode->i_op = &btrfs_file_inode_operations; | ||
7193 | |||
7047 | err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); | 7194 | err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); |
7048 | if (err) | 7195 | if (err) |
7049 | drop_inode = 1; | 7196 | drop_inode = 1; |
7050 | else { | 7197 | else { |
7051 | inode->i_mapping->a_ops = &btrfs_aops; | 7198 | inode->i_mapping->a_ops = &btrfs_aops; |
7052 | inode->i_mapping->backing_dev_info = &root->fs_info->bdi; | 7199 | inode->i_mapping->backing_dev_info = &root->fs_info->bdi; |
7053 | inode->i_fop = &btrfs_file_operations; | ||
7054 | inode->i_op = &btrfs_file_inode_operations; | ||
7055 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; | 7200 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; |
7056 | } | 7201 | } |
7057 | if (drop_inode) | 7202 | if (drop_inode) |
@@ -7321,6 +7466,7 @@ static const struct inode_operations btrfs_symlink_inode_operations = { | |||
7321 | .follow_link = page_follow_link_light, | 7466 | .follow_link = page_follow_link_light, |
7322 | .put_link = page_put_link, | 7467 | .put_link = page_put_link, |
7323 | .getattr = btrfs_getattr, | 7468 | .getattr = btrfs_getattr, |
7469 | .setattr = btrfs_setattr, | ||
7324 | .permission = btrfs_permission, | 7470 | .permission = btrfs_permission, |
7325 | .setxattr = btrfs_setxattr, | 7471 | .setxattr = btrfs_setxattr, |
7326 | .getxattr = btrfs_getxattr, | 7472 | .getxattr = btrfs_getxattr, |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 4a34c472f126..c04f02c7d5bb 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -252,11 +252,11 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | |||
252 | trans = btrfs_join_transaction(root); | 252 | trans = btrfs_join_transaction(root); |
253 | BUG_ON(IS_ERR(trans)); | 253 | BUG_ON(IS_ERR(trans)); |
254 | 254 | ||
255 | btrfs_update_iflags(inode); | ||
256 | inode->i_ctime = CURRENT_TIME; | ||
255 | ret = btrfs_update_inode(trans, root, inode); | 257 | ret = btrfs_update_inode(trans, root, inode); |
256 | BUG_ON(ret); | 258 | BUG_ON(ret); |
257 | 259 | ||
258 | btrfs_update_iflags(inode); | ||
259 | inode->i_ctime = CURRENT_TIME; | ||
260 | btrfs_end_transaction(trans, root); | 260 | btrfs_end_transaction(trans, root); |
261 | 261 | ||
262 | mnt_drop_write(file->f_path.mnt); | 262 | mnt_drop_write(file->f_path.mnt); |
@@ -858,8 +858,10 @@ static int cluster_pages_for_defrag(struct inode *inode, | |||
858 | return 0; | 858 | return 0; |
859 | file_end = (isize - 1) >> PAGE_CACHE_SHIFT; | 859 | file_end = (isize - 1) >> PAGE_CACHE_SHIFT; |
860 | 860 | ||
861 | mutex_lock(&inode->i_mutex); | ||
861 | ret = btrfs_delalloc_reserve_space(inode, | 862 | ret = btrfs_delalloc_reserve_space(inode, |
862 | num_pages << PAGE_CACHE_SHIFT); | 863 | num_pages << PAGE_CACHE_SHIFT); |
864 | mutex_unlock(&inode->i_mutex); | ||
863 | if (ret) | 865 | if (ret) |
864 | return ret; | 866 | return ret; |
865 | again: | 867 | again: |
@@ -1216,12 +1218,12 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, | |||
1216 | *devstr = '\0'; | 1218 | *devstr = '\0'; |
1217 | devstr = vol_args->name; | 1219 | devstr = vol_args->name; |
1218 | devid = simple_strtoull(devstr, &end, 10); | 1220 | devid = simple_strtoull(devstr, &end, 10); |
1219 | printk(KERN_INFO "resizing devid %llu\n", | 1221 | printk(KERN_INFO "btrfs: resizing devid %llu\n", |
1220 | (unsigned long long)devid); | 1222 | (unsigned long long)devid); |
1221 | } | 1223 | } |
1222 | device = btrfs_find_device(root, devid, NULL, NULL); | 1224 | device = btrfs_find_device(root, devid, NULL, NULL); |
1223 | if (!device) { | 1225 | if (!device) { |
1224 | printk(KERN_INFO "resizer unable to find device %llu\n", | 1226 | printk(KERN_INFO "btrfs: resizer unable to find device %llu\n", |
1225 | (unsigned long long)devid); | 1227 | (unsigned long long)devid); |
1226 | ret = -EINVAL; | 1228 | ret = -EINVAL; |
1227 | goto out_unlock; | 1229 | goto out_unlock; |
@@ -1267,7 +1269,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, | |||
1267 | do_div(new_size, root->sectorsize); | 1269 | do_div(new_size, root->sectorsize); |
1268 | new_size *= root->sectorsize; | 1270 | new_size *= root->sectorsize; |
1269 | 1271 | ||
1270 | printk(KERN_INFO "new size for %s is %llu\n", | 1272 | printk(KERN_INFO "btrfs: new size for %s is %llu\n", |
1271 | device->name, (unsigned long long)new_size); | 1273 | device->name, (unsigned long long)new_size); |
1272 | 1274 | ||
1273 | if (new_size > old_size) { | 1275 | if (new_size > old_size) { |
@@ -1278,7 +1280,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, | |||
1278 | } | 1280 | } |
1279 | ret = btrfs_grow_device(trans, device, new_size); | 1281 | ret = btrfs_grow_device(trans, device, new_size); |
1280 | btrfs_commit_transaction(trans, root); | 1282 | btrfs_commit_transaction(trans, root); |
1281 | } else { | 1283 | } else if (new_size < old_size) { |
1282 | ret = btrfs_shrink_device(device, new_size); | 1284 | ret = btrfs_shrink_device(device, new_size); |
1283 | } | 1285 | } |
1284 | 1286 | ||
@@ -2930,11 +2932,13 @@ static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg) | |||
2930 | goto out; | 2932 | goto out; |
2931 | 2933 | ||
2932 | for (i = 0; i < ipath->fspath->elem_cnt; ++i) { | 2934 | for (i = 0; i < ipath->fspath->elem_cnt; ++i) { |
2933 | rel_ptr = ipath->fspath->val[i] - (u64)ipath->fspath->val; | 2935 | rel_ptr = ipath->fspath->val[i] - |
2936 | (u64)(unsigned long)ipath->fspath->val; | ||
2934 | ipath->fspath->val[i] = rel_ptr; | 2937 | ipath->fspath->val[i] = rel_ptr; |
2935 | } | 2938 | } |
2936 | 2939 | ||
2937 | ret = copy_to_user((void *)ipa->fspath, (void *)ipath->fspath, size); | 2940 | ret = copy_to_user((void *)(unsigned long)ipa->fspath, |
2941 | (void *)(unsigned long)ipath->fspath, size); | ||
2938 | if (ret) { | 2942 | if (ret) { |
2939 | ret = -EFAULT; | 2943 | ret = -EFAULT; |
2940 | goto out; | 2944 | goto out; |
@@ -3017,7 +3021,8 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root, | |||
3017 | if (ret < 0) | 3021 | if (ret < 0) |
3018 | goto out; | 3022 | goto out; |
3019 | 3023 | ||
3020 | ret = copy_to_user((void *)loi->inodes, (void *)inodes, size); | 3024 | ret = copy_to_user((void *)(unsigned long)loi->inodes, |
3025 | (void *)(unsigned long)inodes, size); | ||
3021 | if (ret) | 3026 | if (ret) |
3022 | ret = -EFAULT; | 3027 | ret = -EFAULT; |
3023 | 3028 | ||
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 24d654ce7a06..cfb55434a469 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
@@ -1174,6 +1174,8 @@ static int clone_backref_node(struct btrfs_trans_handle *trans, | |||
1174 | list_add_tail(&new_edge->list[UPPER], | 1174 | list_add_tail(&new_edge->list[UPPER], |
1175 | &new_node->lower); | 1175 | &new_node->lower); |
1176 | } | 1176 | } |
1177 | } else { | ||
1178 | list_add_tail(&new_node->lower, &cache->leaves); | ||
1177 | } | 1179 | } |
1178 | 1180 | ||
1179 | rb_node = tree_insert(&cache->rb_root, new_node->bytenr, | 1181 | rb_node = tree_insert(&cache->rb_root, new_node->bytenr, |
@@ -2945,7 +2947,9 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
2945 | index = (cluster->start - offset) >> PAGE_CACHE_SHIFT; | 2947 | index = (cluster->start - offset) >> PAGE_CACHE_SHIFT; |
2946 | last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT; | 2948 | last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT; |
2947 | while (index <= last_index) { | 2949 | while (index <= last_index) { |
2950 | mutex_lock(&inode->i_mutex); | ||
2948 | ret = btrfs_delalloc_reserve_metadata(inode, PAGE_CACHE_SIZE); | 2951 | ret = btrfs_delalloc_reserve_metadata(inode, PAGE_CACHE_SIZE); |
2952 | mutex_unlock(&inode->i_mutex); | ||
2949 | if (ret) | 2953 | if (ret) |
2950 | goto out; | 2954 | goto out; |
2951 | 2955 | ||
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index ed11d3866afd..ddf2c90d3fc0 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c | |||
@@ -256,6 +256,11 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, void *ctx) | |||
256 | btrfs_release_path(swarn->path); | 256 | btrfs_release_path(swarn->path); |
257 | 257 | ||
258 | ipath = init_ipath(4096, local_root, swarn->path); | 258 | ipath = init_ipath(4096, local_root, swarn->path); |
259 | if (IS_ERR(ipath)) { | ||
260 | ret = PTR_ERR(ipath); | ||
261 | ipath = NULL; | ||
262 | goto err; | ||
263 | } | ||
259 | ret = paths_from_inode(inum, ipath); | 264 | ret = paths_from_inode(inum, ipath); |
260 | 265 | ||
261 | if (ret < 0) | 266 | if (ret < 0) |
@@ -272,7 +277,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, void *ctx) | |||
272 | swarn->logical, swarn->dev->name, | 277 | swarn->logical, swarn->dev->name, |
273 | (unsigned long long)swarn->sector, root, inum, offset, | 278 | (unsigned long long)swarn->sector, root, inum, offset, |
274 | min(isize - offset, (u64)PAGE_SIZE), nlink, | 279 | min(isize - offset, (u64)PAGE_SIZE), nlink, |
275 | (char *)ipath->fspath->val[i]); | 280 | (char *)(unsigned long)ipath->fspath->val[i]); |
276 | 281 | ||
277 | free_ipath(ipath); | 282 | free_ipath(ipath); |
278 | return 0; | 283 | return 0; |
@@ -944,50 +949,18 @@ static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer) | |||
944 | static int scrub_submit(struct scrub_dev *sdev) | 949 | static int scrub_submit(struct scrub_dev *sdev) |
945 | { | 950 | { |
946 | struct scrub_bio *sbio; | 951 | struct scrub_bio *sbio; |
947 | struct bio *bio; | ||
948 | int i; | ||
949 | 952 | ||
950 | if (sdev->curr == -1) | 953 | if (sdev->curr == -1) |
951 | return 0; | 954 | return 0; |
952 | 955 | ||
953 | sbio = sdev->bios[sdev->curr]; | 956 | sbio = sdev->bios[sdev->curr]; |
954 | |||
955 | bio = bio_alloc(GFP_NOFS, sbio->count); | ||
956 | if (!bio) | ||
957 | goto nomem; | ||
958 | |||
959 | bio->bi_private = sbio; | ||
960 | bio->bi_end_io = scrub_bio_end_io; | ||
961 | bio->bi_bdev = sdev->dev->bdev; | ||
962 | bio->bi_sector = sbio->physical >> 9; | ||
963 | |||
964 | for (i = 0; i < sbio->count; ++i) { | ||
965 | struct page *page; | ||
966 | int ret; | ||
967 | |||
968 | page = alloc_page(GFP_NOFS); | ||
969 | if (!page) | ||
970 | goto nomem; | ||
971 | |||
972 | ret = bio_add_page(bio, page, PAGE_SIZE, 0); | ||
973 | if (!ret) { | ||
974 | __free_page(page); | ||
975 | goto nomem; | ||
976 | } | ||
977 | } | ||
978 | |||
979 | sbio->err = 0; | 957 | sbio->err = 0; |
980 | sdev->curr = -1; | 958 | sdev->curr = -1; |
981 | atomic_inc(&sdev->in_flight); | 959 | atomic_inc(&sdev->in_flight); |
982 | 960 | ||
983 | submit_bio(READ, bio); | 961 | submit_bio(READ, sbio->bio); |
984 | 962 | ||
985 | return 0; | 963 | return 0; |
986 | |||
987 | nomem: | ||
988 | scrub_free_bio(bio); | ||
989 | |||
990 | return -ENOMEM; | ||
991 | } | 964 | } |
992 | 965 | ||
993 | static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len, | 966 | static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len, |
@@ -995,6 +968,8 @@ static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len, | |||
995 | u8 *csum, int force) | 968 | u8 *csum, int force) |
996 | { | 969 | { |
997 | struct scrub_bio *sbio; | 970 | struct scrub_bio *sbio; |
971 | struct page *page; | ||
972 | int ret; | ||
998 | 973 | ||
999 | again: | 974 | again: |
1000 | /* | 975 | /* |
@@ -1015,12 +990,22 @@ again: | |||
1015 | } | 990 | } |
1016 | sbio = sdev->bios[sdev->curr]; | 991 | sbio = sdev->bios[sdev->curr]; |
1017 | if (sbio->count == 0) { | 992 | if (sbio->count == 0) { |
993 | struct bio *bio; | ||
994 | |||
1018 | sbio->physical = physical; | 995 | sbio->physical = physical; |
1019 | sbio->logical = logical; | 996 | sbio->logical = logical; |
997 | bio = bio_alloc(GFP_NOFS, SCRUB_PAGES_PER_BIO); | ||
998 | if (!bio) | ||
999 | return -ENOMEM; | ||
1000 | |||
1001 | bio->bi_private = sbio; | ||
1002 | bio->bi_end_io = scrub_bio_end_io; | ||
1003 | bio->bi_bdev = sdev->dev->bdev; | ||
1004 | bio->bi_sector = sbio->physical >> 9; | ||
1005 | sbio->err = 0; | ||
1006 | sbio->bio = bio; | ||
1020 | } else if (sbio->physical + sbio->count * PAGE_SIZE != physical || | 1007 | } else if (sbio->physical + sbio->count * PAGE_SIZE != physical || |
1021 | sbio->logical + sbio->count * PAGE_SIZE != logical) { | 1008 | sbio->logical + sbio->count * PAGE_SIZE != logical) { |
1022 | int ret; | ||
1023 | |||
1024 | ret = scrub_submit(sdev); | 1009 | ret = scrub_submit(sdev); |
1025 | if (ret) | 1010 | if (ret) |
1026 | return ret; | 1011 | return ret; |
@@ -1030,6 +1015,20 @@ again: | |||
1030 | sbio->spag[sbio->count].generation = gen; | 1015 | sbio->spag[sbio->count].generation = gen; |
1031 | sbio->spag[sbio->count].have_csum = 0; | 1016 | sbio->spag[sbio->count].have_csum = 0; |
1032 | sbio->spag[sbio->count].mirror_num = mirror_num; | 1017 | sbio->spag[sbio->count].mirror_num = mirror_num; |
1018 | |||
1019 | page = alloc_page(GFP_NOFS); | ||
1020 | if (!page) | ||
1021 | return -ENOMEM; | ||
1022 | |||
1023 | ret = bio_add_page(sbio->bio, page, PAGE_SIZE, 0); | ||
1024 | if (!ret) { | ||
1025 | __free_page(page); | ||
1026 | ret = scrub_submit(sdev); | ||
1027 | if (ret) | ||
1028 | return ret; | ||
1029 | goto again; | ||
1030 | } | ||
1031 | |||
1033 | if (csum) { | 1032 | if (csum) { |
1034 | sbio->spag[sbio->count].have_csum = 1; | 1033 | sbio->spag[sbio->count].have_csum = 1; |
1035 | memcpy(sbio->spag[sbio->count].csum, csum, sdev->csum_size); | 1034 | memcpy(sbio->spag[sbio->count].csum, csum, sdev->csum_size); |
@@ -1536,18 +1535,22 @@ static noinline_for_stack int scrub_supers(struct scrub_dev *sdev) | |||
1536 | static noinline_for_stack int scrub_workers_get(struct btrfs_root *root) | 1535 | static noinline_for_stack int scrub_workers_get(struct btrfs_root *root) |
1537 | { | 1536 | { |
1538 | struct btrfs_fs_info *fs_info = root->fs_info; | 1537 | struct btrfs_fs_info *fs_info = root->fs_info; |
1538 | int ret = 0; | ||
1539 | 1539 | ||
1540 | mutex_lock(&fs_info->scrub_lock); | 1540 | mutex_lock(&fs_info->scrub_lock); |
1541 | if (fs_info->scrub_workers_refcnt == 0) { | 1541 | if (fs_info->scrub_workers_refcnt == 0) { |
1542 | btrfs_init_workers(&fs_info->scrub_workers, "scrub", | 1542 | btrfs_init_workers(&fs_info->scrub_workers, "scrub", |
1543 | fs_info->thread_pool_size, &fs_info->generic_worker); | 1543 | fs_info->thread_pool_size, &fs_info->generic_worker); |
1544 | fs_info->scrub_workers.idle_thresh = 4; | 1544 | fs_info->scrub_workers.idle_thresh = 4; |
1545 | btrfs_start_workers(&fs_info->scrub_workers, 1); | 1545 | ret = btrfs_start_workers(&fs_info->scrub_workers); |
1546 | if (ret) | ||
1547 | goto out; | ||
1546 | } | 1548 | } |
1547 | ++fs_info->scrub_workers_refcnt; | 1549 | ++fs_info->scrub_workers_refcnt; |
1550 | out: | ||
1548 | mutex_unlock(&fs_info->scrub_lock); | 1551 | mutex_unlock(&fs_info->scrub_lock); |
1549 | 1552 | ||
1550 | return 0; | 1553 | return ret; |
1551 | } | 1554 | } |
1552 | 1555 | ||
1553 | static noinline_for_stack void scrub_workers_put(struct btrfs_root *root) | 1556 | static noinline_for_stack void scrub_workers_put(struct btrfs_root *root) |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 57080dffdfc6..200f63bc6675 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include <linux/slab.h> | 41 | #include <linux/slab.h> |
42 | #include <linux/cleancache.h> | 42 | #include <linux/cleancache.h> |
43 | #include <linux/mnt_namespace.h> | 43 | #include <linux/mnt_namespace.h> |
44 | #include <linux/ratelimit.h> | ||
44 | #include "compat.h" | 45 | #include "compat.h" |
45 | #include "delayed-inode.h" | 46 | #include "delayed-inode.h" |
46 | #include "ctree.h" | 47 | #include "ctree.h" |
@@ -197,7 +198,7 @@ static match_table_t tokens = { | |||
197 | {Opt_subvolrootid, "subvolrootid=%d"}, | 198 | {Opt_subvolrootid, "subvolrootid=%d"}, |
198 | {Opt_defrag, "autodefrag"}, | 199 | {Opt_defrag, "autodefrag"}, |
199 | {Opt_inode_cache, "inode_cache"}, | 200 | {Opt_inode_cache, "inode_cache"}, |
200 | {Opt_no_space_cache, "no_space_cache"}, | 201 | {Opt_no_space_cache, "nospace_cache"}, |
201 | {Opt_recovery, "recovery"}, | 202 | {Opt_recovery, "recovery"}, |
202 | {Opt_err, NULL}, | 203 | {Opt_err, NULL}, |
203 | }; | 204 | }; |
@@ -448,6 +449,7 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, | |||
448 | token = match_token(p, tokens, args); | 449 | token = match_token(p, tokens, args); |
449 | switch (token) { | 450 | switch (token) { |
450 | case Opt_subvol: | 451 | case Opt_subvol: |
452 | kfree(*subvol_name); | ||
451 | *subvol_name = match_strdup(&args[0]); | 453 | *subvol_name = match_strdup(&args[0]); |
452 | break; | 454 | break; |
453 | case Opt_subvolid: | 455 | case Opt_subvolid: |
@@ -710,7 +712,7 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
710 | if (btrfs_test_opt(root, SPACE_CACHE)) | 712 | if (btrfs_test_opt(root, SPACE_CACHE)) |
711 | seq_puts(seq, ",space_cache"); | 713 | seq_puts(seq, ",space_cache"); |
712 | else | 714 | else |
713 | seq_puts(seq, ",no_space_cache"); | 715 | seq_puts(seq, ",nospace_cache"); |
714 | if (btrfs_test_opt(root, CLEAR_CACHE)) | 716 | if (btrfs_test_opt(root, CLEAR_CACHE)) |
715 | seq_puts(seq, ",clear_cache"); | 717 | seq_puts(seq, ",clear_cache"); |
716 | if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED)) | 718 | if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED)) |
@@ -824,13 +826,9 @@ static char *setup_root_args(char *args) | |||
824 | static struct dentry *mount_subvol(const char *subvol_name, int flags, | 826 | static struct dentry *mount_subvol(const char *subvol_name, int flags, |
825 | const char *device_name, char *data) | 827 | const char *device_name, char *data) |
826 | { | 828 | { |
827 | struct super_block *s; | ||
828 | struct dentry *root; | 829 | struct dentry *root; |
829 | struct vfsmount *mnt; | 830 | struct vfsmount *mnt; |
830 | struct mnt_namespace *ns_private; | ||
831 | char *newargs; | 831 | char *newargs; |
832 | struct path path; | ||
833 | int error; | ||
834 | 832 | ||
835 | newargs = setup_root_args(data); | 833 | newargs = setup_root_args(data); |
836 | if (!newargs) | 834 | if (!newargs) |
@@ -841,39 +839,17 @@ static struct dentry *mount_subvol(const char *subvol_name, int flags, | |||
841 | if (IS_ERR(mnt)) | 839 | if (IS_ERR(mnt)) |
842 | return ERR_CAST(mnt); | 840 | return ERR_CAST(mnt); |
843 | 841 | ||
844 | ns_private = create_mnt_ns(mnt); | 842 | root = mount_subtree(mnt, subvol_name); |
845 | if (IS_ERR(ns_private)) { | ||
846 | mntput(mnt); | ||
847 | return ERR_CAST(ns_private); | ||
848 | } | ||
849 | |||
850 | /* | ||
851 | * This will trigger the automount of the subvol so we can just | ||
852 | * drop the mnt we have here and return the dentry that we | ||
853 | * found. | ||
854 | */ | ||
855 | error = vfs_path_lookup(mnt->mnt_root, mnt, subvol_name, | ||
856 | LOOKUP_FOLLOW, &path); | ||
857 | put_mnt_ns(ns_private); | ||
858 | if (error) | ||
859 | return ERR_PTR(error); | ||
860 | 843 | ||
861 | if (!is_subvolume_inode(path.dentry->d_inode)) { | 844 | if (!IS_ERR(root) && !is_subvolume_inode(root->d_inode)) { |
862 | path_put(&path); | 845 | struct super_block *s = root->d_sb; |
863 | mntput(mnt); | 846 | dput(root); |
864 | error = -EINVAL; | 847 | root = ERR_PTR(-EINVAL); |
848 | deactivate_locked_super(s); | ||
865 | printk(KERN_ERR "btrfs: '%s' is not a valid subvolume\n", | 849 | printk(KERN_ERR "btrfs: '%s' is not a valid subvolume\n", |
866 | subvol_name); | 850 | subvol_name); |
867 | return ERR_PTR(-EINVAL); | ||
868 | } | 851 | } |
869 | 852 | ||
870 | /* Get a ref to the sb and the dentry we found and return it */ | ||
871 | s = path.mnt->mnt_sb; | ||
872 | atomic_inc(&s->s_active); | ||
873 | root = dget(path.dentry); | ||
874 | path_put(&path); | ||
875 | down_write(&s->s_umount); | ||
876 | |||
877 | return root; | 853 | return root; |
878 | } | 854 | } |
879 | 855 | ||
@@ -890,7 +866,6 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, | |||
890 | struct super_block *s; | 866 | struct super_block *s; |
891 | struct dentry *root; | 867 | struct dentry *root; |
892 | struct btrfs_fs_devices *fs_devices = NULL; | 868 | struct btrfs_fs_devices *fs_devices = NULL; |
893 | struct btrfs_root *tree_root = NULL; | ||
894 | struct btrfs_fs_info *fs_info = NULL; | 869 | struct btrfs_fs_info *fs_info = NULL; |
895 | fmode_t mode = FMODE_READ; | 870 | fmode_t mode = FMODE_READ; |
896 | char *subvol_name = NULL; | 871 | char *subvol_name = NULL; |
@@ -904,8 +879,10 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, | |||
904 | error = btrfs_parse_early_options(data, mode, fs_type, | 879 | error = btrfs_parse_early_options(data, mode, fs_type, |
905 | &subvol_name, &subvol_objectid, | 880 | &subvol_name, &subvol_objectid, |
906 | &subvol_rootid, &fs_devices); | 881 | &subvol_rootid, &fs_devices); |
907 | if (error) | 882 | if (error) { |
883 | kfree(subvol_name); | ||
908 | return ERR_PTR(error); | 884 | return ERR_PTR(error); |
885 | } | ||
909 | 886 | ||
910 | if (subvol_name) { | 887 | if (subvol_name) { |
911 | root = mount_subvol(subvol_name, flags, device_name, data); | 888 | root = mount_subvol(subvol_name, flags, device_name, data); |
@@ -917,15 +894,6 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, | |||
917 | if (error) | 894 | if (error) |
918 | return ERR_PTR(error); | 895 | return ERR_PTR(error); |
919 | 896 | ||
920 | error = btrfs_open_devices(fs_devices, mode, fs_type); | ||
921 | if (error) | ||
922 | return ERR_PTR(error); | ||
923 | |||
924 | if (!(flags & MS_RDONLY) && fs_devices->rw_devices == 0) { | ||
925 | error = -EACCES; | ||
926 | goto error_close_devices; | ||
927 | } | ||
928 | |||
929 | /* | 897 | /* |
930 | * Setup a dummy root and fs_info for test/set super. This is because | 898 | * Setup a dummy root and fs_info for test/set super. This is because |
931 | * we don't actually fill this stuff out until open_ctree, but we need | 899 | * we don't actually fill this stuff out until open_ctree, but we need |
@@ -933,24 +901,36 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, | |||
933 | * then open_ctree will properly initialize everything later. | 901 | * then open_ctree will properly initialize everything later. |
934 | */ | 902 | */ |
935 | fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_NOFS); | 903 | fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_NOFS); |
936 | tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); | 904 | if (!fs_info) |
937 | if (!fs_info || !tree_root) { | 905 | return ERR_PTR(-ENOMEM); |
906 | |||
907 | fs_info->tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); | ||
908 | if (!fs_info->tree_root) { | ||
938 | error = -ENOMEM; | 909 | error = -ENOMEM; |
939 | goto error_close_devices; | 910 | goto error_fs_info; |
940 | } | 911 | } |
941 | fs_info->tree_root = tree_root; | 912 | fs_info->tree_root->fs_info = fs_info; |
942 | fs_info->fs_devices = fs_devices; | 913 | fs_info->fs_devices = fs_devices; |
943 | tree_root->fs_info = fs_info; | ||
944 | 914 | ||
945 | fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS); | 915 | fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS); |
946 | fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS); | 916 | fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS); |
947 | if (!fs_info->super_copy || !fs_info->super_for_commit) { | 917 | if (!fs_info->super_copy || !fs_info->super_for_commit) { |
948 | error = -ENOMEM; | 918 | error = -ENOMEM; |
919 | goto error_fs_info; | ||
920 | } | ||
921 | |||
922 | error = btrfs_open_devices(fs_devices, mode, fs_type); | ||
923 | if (error) | ||
924 | goto error_fs_info; | ||
925 | |||
926 | if (!(flags & MS_RDONLY) && fs_devices->rw_devices == 0) { | ||
927 | error = -EACCES; | ||
949 | goto error_close_devices; | 928 | goto error_close_devices; |
950 | } | 929 | } |
951 | 930 | ||
952 | bdev = fs_devices->latest_bdev; | 931 | bdev = fs_devices->latest_bdev; |
953 | s = sget(fs_type, btrfs_test_super, btrfs_set_super, tree_root); | 932 | s = sget(fs_type, btrfs_test_super, btrfs_set_super, |
933 | fs_info->tree_root); | ||
954 | if (IS_ERR(s)) { | 934 | if (IS_ERR(s)) { |
955 | error = PTR_ERR(s); | 935 | error = PTR_ERR(s); |
956 | goto error_close_devices; | 936 | goto error_close_devices; |
@@ -959,12 +939,12 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, | |||
959 | if (s->s_root) { | 939 | if (s->s_root) { |
960 | if ((flags ^ s->s_flags) & MS_RDONLY) { | 940 | if ((flags ^ s->s_flags) & MS_RDONLY) { |
961 | deactivate_locked_super(s); | 941 | deactivate_locked_super(s); |
962 | return ERR_PTR(-EBUSY); | 942 | error = -EBUSY; |
943 | goto error_close_devices; | ||
963 | } | 944 | } |
964 | 945 | ||
965 | btrfs_close_devices(fs_devices); | 946 | btrfs_close_devices(fs_devices); |
966 | free_fs_info(fs_info); | 947 | free_fs_info(fs_info); |
967 | kfree(tree_root); | ||
968 | } else { | 948 | } else { |
969 | char b[BDEVNAME_SIZE]; | 949 | char b[BDEVNAME_SIZE]; |
970 | 950 | ||
@@ -991,8 +971,8 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, | |||
991 | 971 | ||
992 | error_close_devices: | 972 | error_close_devices: |
993 | btrfs_close_devices(fs_devices); | 973 | btrfs_close_devices(fs_devices); |
974 | error_fs_info: | ||
994 | free_fs_info(fs_info); | 975 | free_fs_info(fs_info); |
995 | kfree(tree_root); | ||
996 | return ERR_PTR(error); | 976 | return ERR_PTR(error); |
997 | } | 977 | } |
998 | 978 | ||
@@ -1074,11 +1054,11 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) | |||
1074 | u64 avail_space; | 1054 | u64 avail_space; |
1075 | u64 used_space; | 1055 | u64 used_space; |
1076 | u64 min_stripe_size; | 1056 | u64 min_stripe_size; |
1077 | int min_stripes = 1; | 1057 | int min_stripes = 1, num_stripes = 1; |
1078 | int i = 0, nr_devices; | 1058 | int i = 0, nr_devices; |
1079 | int ret; | 1059 | int ret; |
1080 | 1060 | ||
1081 | nr_devices = fs_info->fs_devices->rw_devices; | 1061 | nr_devices = fs_info->fs_devices->open_devices; |
1082 | BUG_ON(!nr_devices); | 1062 | BUG_ON(!nr_devices); |
1083 | 1063 | ||
1084 | devices_info = kmalloc(sizeof(*devices_info) * nr_devices, | 1064 | devices_info = kmalloc(sizeof(*devices_info) * nr_devices, |
@@ -1088,20 +1068,24 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) | |||
1088 | 1068 | ||
1089 | /* calc min stripe number for data space alloction */ | 1069 | /* calc min stripe number for data space alloction */ |
1090 | type = btrfs_get_alloc_profile(root, 1); | 1070 | type = btrfs_get_alloc_profile(root, 1); |
1091 | if (type & BTRFS_BLOCK_GROUP_RAID0) | 1071 | if (type & BTRFS_BLOCK_GROUP_RAID0) { |
1092 | min_stripes = 2; | 1072 | min_stripes = 2; |
1093 | else if (type & BTRFS_BLOCK_GROUP_RAID1) | 1073 | num_stripes = nr_devices; |
1074 | } else if (type & BTRFS_BLOCK_GROUP_RAID1) { | ||
1094 | min_stripes = 2; | 1075 | min_stripes = 2; |
1095 | else if (type & BTRFS_BLOCK_GROUP_RAID10) | 1076 | num_stripes = 2; |
1077 | } else if (type & BTRFS_BLOCK_GROUP_RAID10) { | ||
1096 | min_stripes = 4; | 1078 | min_stripes = 4; |
1079 | num_stripes = 4; | ||
1080 | } | ||
1097 | 1081 | ||
1098 | if (type & BTRFS_BLOCK_GROUP_DUP) | 1082 | if (type & BTRFS_BLOCK_GROUP_DUP) |
1099 | min_stripe_size = 2 * BTRFS_STRIPE_LEN; | 1083 | min_stripe_size = 2 * BTRFS_STRIPE_LEN; |
1100 | else | 1084 | else |
1101 | min_stripe_size = BTRFS_STRIPE_LEN; | 1085 | min_stripe_size = BTRFS_STRIPE_LEN; |
1102 | 1086 | ||
1103 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { | 1087 | list_for_each_entry(device, &fs_devices->devices, dev_list) { |
1104 | if (!device->in_fs_metadata) | 1088 | if (!device->in_fs_metadata || !device->bdev) |
1105 | continue; | 1089 | continue; |
1106 | 1090 | ||
1107 | avail_space = device->total_bytes - device->bytes_used; | 1091 | avail_space = device->total_bytes - device->bytes_used; |
@@ -1162,13 +1146,16 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) | |||
1162 | i = nr_devices - 1; | 1146 | i = nr_devices - 1; |
1163 | avail_space = 0; | 1147 | avail_space = 0; |
1164 | while (nr_devices >= min_stripes) { | 1148 | while (nr_devices >= min_stripes) { |
1149 | if (num_stripes > nr_devices) | ||
1150 | num_stripes = nr_devices; | ||
1151 | |||
1165 | if (devices_info[i].max_avail >= min_stripe_size) { | 1152 | if (devices_info[i].max_avail >= min_stripe_size) { |
1166 | int j; | 1153 | int j; |
1167 | u64 alloc_size; | 1154 | u64 alloc_size; |
1168 | 1155 | ||
1169 | avail_space += devices_info[i].max_avail * min_stripes; | 1156 | avail_space += devices_info[i].max_avail * num_stripes; |
1170 | alloc_size = devices_info[i].max_avail; | 1157 | alloc_size = devices_info[i].max_avail; |
1171 | for (j = i + 1 - min_stripes; j <= i; j++) | 1158 | for (j = i + 1 - num_stripes; j <= i; j++) |
1172 | devices_info[j].max_avail -= alloc_size; | 1159 | devices_info[j].max_avail -= alloc_size; |
1173 | } | 1160 | } |
1174 | i--; | 1161 | i--; |
@@ -1285,6 +1272,16 @@ static int btrfs_unfreeze(struct super_block *sb) | |||
1285 | return 0; | 1272 | return 0; |
1286 | } | 1273 | } |
1287 | 1274 | ||
1275 | static void btrfs_fs_dirty_inode(struct inode *inode, int flags) | ||
1276 | { | ||
1277 | int ret; | ||
1278 | |||
1279 | ret = btrfs_dirty_inode(inode); | ||
1280 | if (ret) | ||
1281 | printk_ratelimited(KERN_ERR "btrfs: fail to dirty inode %Lu " | ||
1282 | "error %d\n", btrfs_ino(inode), ret); | ||
1283 | } | ||
1284 | |||
1288 | static const struct super_operations btrfs_super_ops = { | 1285 | static const struct super_operations btrfs_super_ops = { |
1289 | .drop_inode = btrfs_drop_inode, | 1286 | .drop_inode = btrfs_drop_inode, |
1290 | .evict_inode = btrfs_evict_inode, | 1287 | .evict_inode = btrfs_evict_inode, |
@@ -1292,7 +1289,7 @@ static const struct super_operations btrfs_super_ops = { | |||
1292 | .sync_fs = btrfs_sync_fs, | 1289 | .sync_fs = btrfs_sync_fs, |
1293 | .show_options = btrfs_show_options, | 1290 | .show_options = btrfs_show_options, |
1294 | .write_inode = btrfs_write_inode, | 1291 | .write_inode = btrfs_write_inode, |
1295 | .dirty_inode = btrfs_dirty_inode, | 1292 | .dirty_inode = btrfs_fs_dirty_inode, |
1296 | .alloc_inode = btrfs_alloc_inode, | 1293 | .alloc_inode = btrfs_alloc_inode, |
1297 | .destroy_inode = btrfs_destroy_inode, | 1294 | .destroy_inode = btrfs_destroy_inode, |
1298 | .statfs = btrfs_statfs, | 1295 | .statfs = btrfs_statfs, |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 960835eaf4da..81376d94cd3c 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -785,6 +785,10 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, | |||
785 | 785 | ||
786 | btrfs_save_ino_cache(root, trans); | 786 | btrfs_save_ino_cache(root, trans); |
787 | 787 | ||
788 | /* see comments in should_cow_block() */ | ||
789 | root->force_cow = 0; | ||
790 | smp_wmb(); | ||
791 | |||
788 | if (root->commit_root != root->node) { | 792 | if (root->commit_root != root->node) { |
789 | mutex_lock(&root->fs_commit_mutex); | 793 | mutex_lock(&root->fs_commit_mutex); |
790 | switch_commit_root(root); | 794 | switch_commit_root(root); |
@@ -882,8 +886,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
882 | btrfs_reloc_pre_snapshot(trans, pending, &to_reserve); | 886 | btrfs_reloc_pre_snapshot(trans, pending, &to_reserve); |
883 | 887 | ||
884 | if (to_reserve > 0) { | 888 | if (to_reserve > 0) { |
885 | ret = btrfs_block_rsv_add(root, &pending->block_rsv, | 889 | ret = btrfs_block_rsv_add_noflush(root, &pending->block_rsv, |
886 | to_reserve); | 890 | to_reserve); |
887 | if (ret) { | 891 | if (ret) { |
888 | pending->error = ret; | 892 | pending->error = ret; |
889 | goto fail; | 893 | goto fail; |
@@ -947,6 +951,10 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
947 | btrfs_tree_unlock(old); | 951 | btrfs_tree_unlock(old); |
948 | free_extent_buffer(old); | 952 | free_extent_buffer(old); |
949 | 953 | ||
954 | /* see comments in should_cow_block() */ | ||
955 | root->force_cow = 1; | ||
956 | smp_wmb(); | ||
957 | |||
950 | btrfs_set_root_node(new_root_item, tmp); | 958 | btrfs_set_root_node(new_root_item, tmp); |
951 | /* record when the snapshot was created in key.offset */ | 959 | /* record when the snapshot was created in key.offset */ |
952 | key.offset = trans->transid; | 960 | key.offset = trans->transid; |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index f8e2943101a1..f4b839fd3c9d 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -295,6 +295,12 @@ loop_lock: | |||
295 | btrfs_requeue_work(&device->work); | 295 | btrfs_requeue_work(&device->work); |
296 | goto done; | 296 | goto done; |
297 | } | 297 | } |
298 | /* unplug every 64 requests just for good measure */ | ||
299 | if (batch_run % 64 == 0) { | ||
300 | blk_finish_plug(&plug); | ||
301 | blk_start_plug(&plug); | ||
302 | sync_pending = 0; | ||
303 | } | ||
298 | } | 304 | } |
299 | 305 | ||
300 | cond_resched(); | 306 | cond_resched(); |
@@ -999,7 +1005,7 @@ static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, | |||
999 | key.objectid = device->devid; | 1005 | key.objectid = device->devid; |
1000 | key.offset = start; | 1006 | key.offset = start; |
1001 | key.type = BTRFS_DEV_EXTENT_KEY; | 1007 | key.type = BTRFS_DEV_EXTENT_KEY; |
1002 | 1008 | again: | |
1003 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | 1009 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); |
1004 | if (ret > 0) { | 1010 | if (ret > 0) { |
1005 | ret = btrfs_previous_item(root, path, key.objectid, | 1011 | ret = btrfs_previous_item(root, path, key.objectid, |
@@ -1012,6 +1018,9 @@ static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, | |||
1012 | struct btrfs_dev_extent); | 1018 | struct btrfs_dev_extent); |
1013 | BUG_ON(found_key.offset > start || found_key.offset + | 1019 | BUG_ON(found_key.offset > start || found_key.offset + |
1014 | btrfs_dev_extent_length(leaf, extent) < start); | 1020 | btrfs_dev_extent_length(leaf, extent) < start); |
1021 | key = found_key; | ||
1022 | btrfs_release_path(path); | ||
1023 | goto again; | ||
1015 | } else if (ret == 0) { | 1024 | } else if (ret == 0) { |
1016 | leaf = path->nodes[0]; | 1025 | leaf = path->nodes[0]; |
1017 | extent = btrfs_item_ptr(leaf, path->slots[0], | 1026 | extent = btrfs_item_ptr(leaf, path->slots[0], |
@@ -1608,7 +1617,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
1608 | if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding) | 1617 | if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding) |
1609 | return -EINVAL; | 1618 | return -EINVAL; |
1610 | 1619 | ||
1611 | bdev = blkdev_get_by_path(device_path, FMODE_EXCL, | 1620 | bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL, |
1612 | root->fs_info->bdev_holder); | 1621 | root->fs_info->bdev_holder); |
1613 | if (IS_ERR(bdev)) | 1622 | if (IS_ERR(bdev)) |
1614 | return PTR_ERR(bdev); | 1623 | return PTR_ERR(bdev); |
@@ -3255,7 +3264,7 @@ static void btrfs_end_bio(struct bio *bio, int err) | |||
3255 | */ | 3264 | */ |
3256 | if (atomic_read(&bbio->error) > bbio->max_errors) { | 3265 | if (atomic_read(&bbio->error) > bbio->max_errors) { |
3257 | err = -EIO; | 3266 | err = -EIO; |
3258 | } else if (err) { | 3267 | } else { |
3259 | /* | 3268 | /* |
3260 | * this bio is actually up to date, we didn't | 3269 | * this bio is actually up to date, we didn't |
3261 | * go over the max number of errors | 3270 | * go over the max number of errors |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index ab5b1c49f352..78f2d4d4f37f 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -100,6 +100,12 @@ struct btrfs_device { | |||
100 | struct reada_zone *reada_curr_zone; | 100 | struct reada_zone *reada_curr_zone; |
101 | struct radix_tree_root reada_zones; | 101 | struct radix_tree_root reada_zones; |
102 | struct radix_tree_root reada_extents; | 102 | struct radix_tree_root reada_extents; |
103 | |||
104 | /* for sending down flush barriers */ | ||
105 | struct bio *flush_bio; | ||
106 | struct completion flush_wait; | ||
107 | int nobarriers; | ||
108 | |||
103 | }; | 109 | }; |
104 | 110 | ||
105 | struct btrfs_fs_devices { | 111 | struct btrfs_fs_devices { |