aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/async-thread.c117
-rw-r--r--fs/btrfs/async-thread.h4
-rw-r--r--fs/btrfs/ctree.h3
-rw-r--r--fs/btrfs/delayed-inode.c4
-rw-r--r--fs/btrfs/disk-io.c34
-rw-r--r--fs/btrfs/extent-tree.c145
-rw-r--r--fs/btrfs/extent_io.c24
-rw-r--r--fs/btrfs/file.c8
-rw-r--r--fs/btrfs/inode.c180
-rw-r--r--fs/btrfs/ioctl.c6
-rw-r--r--fs/btrfs/relocation.c2
-rw-r--r--fs/btrfs/scrub.c8
-rw-r--r--fs/btrfs/super.c32
-rw-r--r--fs/btrfs/volumes.c10
14 files changed, 377 insertions, 200 deletions
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 7ec14097fef..cb97174e236 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -64,6 +64,8 @@ struct btrfs_worker_thread {
64 int idle; 64 int idle;
65}; 65};
66 66
67static int __btrfs_start_workers(struct btrfs_workers *workers);
68
67/* 69/*
68 * btrfs_start_workers uses kthread_run, which can block waiting for memory 70 * btrfs_start_workers uses kthread_run, which can block waiting for memory
69 * for a very long time. It will actually throttle on page writeback, 71 * for a very long time. It will actually throttle on page writeback,
@@ -88,27 +90,10 @@ static void start_new_worker_func(struct btrfs_work *work)
88{ 90{
89 struct worker_start *start; 91 struct worker_start *start;
90 start = container_of(work, struct worker_start, work); 92 start = container_of(work, struct worker_start, work);
91 btrfs_start_workers(start->queue, 1); 93 __btrfs_start_workers(start->queue);
92 kfree(start); 94 kfree(start);
93} 95}
94 96
95static int start_new_worker(struct btrfs_workers *queue)
96{
97 struct worker_start *start;
98 int ret;
99
100 start = kzalloc(sizeof(*start), GFP_NOFS);
101 if (!start)
102 return -ENOMEM;
103
104 start->work.func = start_new_worker_func;
105 start->queue = queue;
106 ret = btrfs_queue_worker(queue->atomic_worker_start, &start->work);
107 if (ret)
108 kfree(start);
109 return ret;
110}
111
112/* 97/*
113 * helper function to move a thread onto the idle list after it 98 * helper function to move a thread onto the idle list after it
114 * has finished some requests. 99 * has finished some requests.
@@ -153,12 +138,20 @@ static void check_busy_worker(struct btrfs_worker_thread *worker)
153static void check_pending_worker_creates(struct btrfs_worker_thread *worker) 138static void check_pending_worker_creates(struct btrfs_worker_thread *worker)
154{ 139{
155 struct btrfs_workers *workers = worker->workers; 140 struct btrfs_workers *workers = worker->workers;
141 struct worker_start *start;
156 unsigned long flags; 142 unsigned long flags;
157 143
158 rmb(); 144 rmb();
159 if (!workers->atomic_start_pending) 145 if (!workers->atomic_start_pending)
160 return; 146 return;
161 147
148 start = kzalloc(sizeof(*start), GFP_NOFS);
149 if (!start)
150 return;
151
152 start->work.func = start_new_worker_func;
153 start->queue = workers;
154
162 spin_lock_irqsave(&workers->lock, flags); 155 spin_lock_irqsave(&workers->lock, flags);
163 if (!workers->atomic_start_pending) 156 if (!workers->atomic_start_pending)
164 goto out; 157 goto out;
@@ -170,10 +163,11 @@ static void check_pending_worker_creates(struct btrfs_worker_thread *worker)
170 163
171 workers->num_workers_starting += 1; 164 workers->num_workers_starting += 1;
172 spin_unlock_irqrestore(&workers->lock, flags); 165 spin_unlock_irqrestore(&workers->lock, flags);
173 start_new_worker(workers); 166 btrfs_queue_worker(workers->atomic_worker_start, &start->work);
174 return; 167 return;
175 168
176out: 169out:
170 kfree(start);
177 spin_unlock_irqrestore(&workers->lock, flags); 171 spin_unlock_irqrestore(&workers->lock, flags);
178} 172}
179 173
@@ -331,7 +325,7 @@ again:
331 run_ordered_completions(worker->workers, work); 325 run_ordered_completions(worker->workers, work);
332 326
333 check_pending_worker_creates(worker); 327 check_pending_worker_creates(worker);
334 328 cond_resched();
335 } 329 }
336 330
337 spin_lock_irq(&worker->lock); 331 spin_lock_irq(&worker->lock);
@@ -462,56 +456,55 @@ void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max,
462 * starts new worker threads. This does not enforce the max worker 456 * starts new worker threads. This does not enforce the max worker
463 * count in case you need to temporarily go past it. 457 * count in case you need to temporarily go past it.
464 */ 458 */
465static int __btrfs_start_workers(struct btrfs_workers *workers, 459static int __btrfs_start_workers(struct btrfs_workers *workers)
466 int num_workers)
467{ 460{
468 struct btrfs_worker_thread *worker; 461 struct btrfs_worker_thread *worker;
469 int ret = 0; 462 int ret = 0;
470 int i;
471 463
472 for (i = 0; i < num_workers; i++) { 464 worker = kzalloc(sizeof(*worker), GFP_NOFS);
473 worker = kzalloc(sizeof(*worker), GFP_NOFS); 465 if (!worker) {
474 if (!worker) { 466 ret = -ENOMEM;
475 ret = -ENOMEM; 467 goto fail;
476 goto fail; 468 }
477 }
478 469
479 INIT_LIST_HEAD(&worker->pending); 470 INIT_LIST_HEAD(&worker->pending);
480 INIT_LIST_HEAD(&worker->prio_pending); 471 INIT_LIST_HEAD(&worker->prio_pending);
481 INIT_LIST_HEAD(&worker->worker_list); 472 INIT_LIST_HEAD(&worker->worker_list);
482 spin_lock_init(&worker->lock); 473 spin_lock_init(&worker->lock);
483 474
484 atomic_set(&worker->num_pending, 0); 475 atomic_set(&worker->num_pending, 0);
485 atomic_set(&worker->refs, 1); 476 atomic_set(&worker->refs, 1);
486 worker->workers = workers; 477 worker->workers = workers;
487 worker->task = kthread_run(worker_loop, worker, 478 worker->task = kthread_run(worker_loop, worker,
488 "btrfs-%s-%d", workers->name, 479 "btrfs-%s-%d", workers->name,
489 workers->num_workers + i); 480 workers->num_workers + 1);
490 if (IS_ERR(worker->task)) { 481 if (IS_ERR(worker->task)) {
491 ret = PTR_ERR(worker->task); 482 ret = PTR_ERR(worker->task);
492 kfree(worker); 483 kfree(worker);
493 goto fail; 484 goto fail;
494 }
495 spin_lock_irq(&workers->lock);
496 list_add_tail(&worker->worker_list, &workers->idle_list);
497 worker->idle = 1;
498 workers->num_workers++;
499 workers->num_workers_starting--;
500 WARN_ON(workers->num_workers_starting < 0);
501 spin_unlock_irq(&workers->lock);
502 } 485 }
486 spin_lock_irq(&workers->lock);
487 list_add_tail(&worker->worker_list, &workers->idle_list);
488 worker->idle = 1;
489 workers->num_workers++;
490 workers->num_workers_starting--;
491 WARN_ON(workers->num_workers_starting < 0);
492 spin_unlock_irq(&workers->lock);
493
503 return 0; 494 return 0;
504fail: 495fail:
505 btrfs_stop_workers(workers); 496 spin_lock_irq(&workers->lock);
497 workers->num_workers_starting--;
498 spin_unlock_irq(&workers->lock);
506 return ret; 499 return ret;
507} 500}
508 501
509int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) 502int btrfs_start_workers(struct btrfs_workers *workers)
510{ 503{
511 spin_lock_irq(&workers->lock); 504 spin_lock_irq(&workers->lock);
512 workers->num_workers_starting += num_workers; 505 workers->num_workers_starting++;
513 spin_unlock_irq(&workers->lock); 506 spin_unlock_irq(&workers->lock);
514 return __btrfs_start_workers(workers, num_workers); 507 return __btrfs_start_workers(workers);
515} 508}
516 509
517/* 510/*
@@ -568,6 +561,7 @@ static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers)
568 struct btrfs_worker_thread *worker; 561 struct btrfs_worker_thread *worker;
569 unsigned long flags; 562 unsigned long flags;
570 struct list_head *fallback; 563 struct list_head *fallback;
564 int ret;
571 565
572again: 566again:
573 spin_lock_irqsave(&workers->lock, flags); 567 spin_lock_irqsave(&workers->lock, flags);
@@ -584,7 +578,9 @@ again:
584 workers->num_workers_starting++; 578 workers->num_workers_starting++;
585 spin_unlock_irqrestore(&workers->lock, flags); 579 spin_unlock_irqrestore(&workers->lock, flags);
586 /* we're below the limit, start another worker */ 580 /* we're below the limit, start another worker */
587 __btrfs_start_workers(workers, 1); 581 ret = __btrfs_start_workers(workers);
582 if (ret)
583 goto fallback;
588 goto again; 584 goto again;
589 } 585 }
590 } 586 }
@@ -665,7 +661,7 @@ void btrfs_set_work_high_prio(struct btrfs_work *work)
665/* 661/*
666 * places a struct btrfs_work into the pending queue of one of the kthreads 662 * places a struct btrfs_work into the pending queue of one of the kthreads
667 */ 663 */
668int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) 664void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
669{ 665{
670 struct btrfs_worker_thread *worker; 666 struct btrfs_worker_thread *worker;
671 unsigned long flags; 667 unsigned long flags;
@@ -673,7 +669,7 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
673 669
674 /* don't requeue something already on a list */ 670 /* don't requeue something already on a list */
675 if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags)) 671 if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags))
676 goto out; 672 return;
677 673
678 worker = find_worker(workers); 674 worker = find_worker(workers);
679 if (workers->ordered) { 675 if (workers->ordered) {
@@ -712,7 +708,4 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
712 if (wake) 708 if (wake)
713 wake_up_process(worker->task); 709 wake_up_process(worker->task);
714 spin_unlock_irqrestore(&worker->lock, flags); 710 spin_unlock_irqrestore(&worker->lock, flags);
715
716out:
717 return 0;
718} 711}
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index 5077746cf85..f34cc31fa3c 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -109,8 +109,8 @@ struct btrfs_workers {
109 char *name; 109 char *name;
110}; 110};
111 111
112int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work); 112void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work);
113int btrfs_start_workers(struct btrfs_workers *workers, int num_workers); 113int btrfs_start_workers(struct btrfs_workers *workers);
114int btrfs_stop_workers(struct btrfs_workers *workers); 114int btrfs_stop_workers(struct btrfs_workers *workers);
115void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max, 115void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max,
116 struct btrfs_workers *async_starter); 116 struct btrfs_workers *async_starter);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 50634abef9b..67385033323 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2692,7 +2692,8 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
2692int btrfs_readpage(struct file *file, struct page *page); 2692int btrfs_readpage(struct file *file, struct page *page);
2693void btrfs_evict_inode(struct inode *inode); 2693void btrfs_evict_inode(struct inode *inode);
2694int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc); 2694int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc);
2695void btrfs_dirty_inode(struct inode *inode, int flags); 2695int btrfs_dirty_inode(struct inode *inode);
2696int btrfs_update_time(struct file *file);
2696struct inode *btrfs_alloc_inode(struct super_block *sb); 2697struct inode *btrfs_alloc_inode(struct super_block *sb);
2697void btrfs_destroy_inode(struct inode *inode); 2698void btrfs_destroy_inode(struct inode *inode);
2698int btrfs_drop_inode(struct inode *inode); 2699int btrfs_drop_inode(struct inode *inode);
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 5b163572e0c..9c1eccc2c50 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -640,8 +640,8 @@ static int btrfs_delayed_inode_reserve_metadata(
640 * Now if src_rsv == delalloc_block_rsv we'll let it just steal since 640 * Now if src_rsv == delalloc_block_rsv we'll let it just steal since
641 * we're accounted for. 641 * we're accounted for.
642 */ 642 */
643 if (!trans->bytes_reserved && 643 if (!src_rsv || (!trans->bytes_reserved &&
644 src_rsv != &root->fs_info->delalloc_block_rsv) { 644 src_rsv != &root->fs_info->delalloc_block_rsv)) {
645 ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes); 645 ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes);
646 /* 646 /*
647 * Since we're under a transaction reserve_metadata_bytes could 647 * Since we're under a transaction reserve_metadata_bytes could
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 632f8f3cc9d..f44b3928dc2 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2194,19 +2194,27 @@ struct btrfs_root *open_ctree(struct super_block *sb,
2194 fs_info->endio_meta_write_workers.idle_thresh = 2; 2194 fs_info->endio_meta_write_workers.idle_thresh = 2;
2195 fs_info->readahead_workers.idle_thresh = 2; 2195 fs_info->readahead_workers.idle_thresh = 2;
2196 2196
2197 btrfs_start_workers(&fs_info->workers, 1); 2197 /*
2198 btrfs_start_workers(&fs_info->generic_worker, 1); 2198 * btrfs_start_workers can really only fail because of ENOMEM so just
2199 btrfs_start_workers(&fs_info->submit_workers, 1); 2199 * return -ENOMEM if any of these fail.
2200 btrfs_start_workers(&fs_info->delalloc_workers, 1); 2200 */
2201 btrfs_start_workers(&fs_info->fixup_workers, 1); 2201 ret = btrfs_start_workers(&fs_info->workers);
2202 btrfs_start_workers(&fs_info->endio_workers, 1); 2202 ret |= btrfs_start_workers(&fs_info->generic_worker);
2203 btrfs_start_workers(&fs_info->endio_meta_workers, 1); 2203 ret |= btrfs_start_workers(&fs_info->submit_workers);
2204 btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); 2204 ret |= btrfs_start_workers(&fs_info->delalloc_workers);
2205 btrfs_start_workers(&fs_info->endio_write_workers, 1); 2205 ret |= btrfs_start_workers(&fs_info->fixup_workers);
2206 btrfs_start_workers(&fs_info->endio_freespace_worker, 1); 2206 ret |= btrfs_start_workers(&fs_info->endio_workers);
2207 btrfs_start_workers(&fs_info->delayed_workers, 1); 2207 ret |= btrfs_start_workers(&fs_info->endio_meta_workers);
2208 btrfs_start_workers(&fs_info->caching_workers, 1); 2208 ret |= btrfs_start_workers(&fs_info->endio_meta_write_workers);
2209 btrfs_start_workers(&fs_info->readahead_workers, 1); 2209 ret |= btrfs_start_workers(&fs_info->endio_write_workers);
2210 ret |= btrfs_start_workers(&fs_info->endio_freespace_worker);
2211 ret |= btrfs_start_workers(&fs_info->delayed_workers);
2212 ret |= btrfs_start_workers(&fs_info->caching_workers);
2213 ret |= btrfs_start_workers(&fs_info->readahead_workers);
2214 if (ret) {
2215 ret = -ENOMEM;
2216 goto fail_sb_buffer;
2217 }
2210 2218
2211 fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); 2219 fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
2212 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, 2220 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index f0d5718d258..f5fbe576d2b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2822,7 +2822,7 @@ out_free:
2822 btrfs_release_path(path); 2822 btrfs_release_path(path);
2823out: 2823out:
2824 spin_lock(&block_group->lock); 2824 spin_lock(&block_group->lock);
2825 if (!ret) 2825 if (!ret && dcs == BTRFS_DC_SETUP)
2826 block_group->cache_generation = trans->transid; 2826 block_group->cache_generation = trans->transid;
2827 block_group->disk_cache_state = dcs; 2827 block_group->disk_cache_state = dcs;
2828 spin_unlock(&block_group->lock); 2828 spin_unlock(&block_group->lock);
@@ -4204,12 +4204,17 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4204 struct btrfs_root *root = BTRFS_I(inode)->root; 4204 struct btrfs_root *root = BTRFS_I(inode)->root;
4205 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; 4205 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
4206 u64 to_reserve = 0; 4206 u64 to_reserve = 0;
4207 u64 csum_bytes;
4207 unsigned nr_extents = 0; 4208 unsigned nr_extents = 0;
4209 int extra_reserve = 0;
4208 int flush = 1; 4210 int flush = 1;
4209 int ret; 4211 int ret;
4210 4212
4213 /* Need to be holding the i_mutex here if we aren't free space cache */
4211 if (btrfs_is_free_space_inode(root, inode)) 4214 if (btrfs_is_free_space_inode(root, inode))
4212 flush = 0; 4215 flush = 0;
4216 else
4217 WARN_ON(!mutex_is_locked(&inode->i_mutex));
4213 4218
4214 if (flush && btrfs_transaction_in_commit(root->fs_info)) 4219 if (flush && btrfs_transaction_in_commit(root->fs_info))
4215 schedule_timeout(1); 4220 schedule_timeout(1);
@@ -4220,11 +4225,9 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4220 BTRFS_I(inode)->outstanding_extents++; 4225 BTRFS_I(inode)->outstanding_extents++;
4221 4226
4222 if (BTRFS_I(inode)->outstanding_extents > 4227 if (BTRFS_I(inode)->outstanding_extents >
4223 BTRFS_I(inode)->reserved_extents) { 4228 BTRFS_I(inode)->reserved_extents)
4224 nr_extents = BTRFS_I(inode)->outstanding_extents - 4229 nr_extents = BTRFS_I(inode)->outstanding_extents -
4225 BTRFS_I(inode)->reserved_extents; 4230 BTRFS_I(inode)->reserved_extents;
4226 BTRFS_I(inode)->reserved_extents += nr_extents;
4227 }
4228 4231
4229 /* 4232 /*
4230 * Add an item to reserve for updating the inode when we complete the 4233 * Add an item to reserve for updating the inode when we complete the
@@ -4232,11 +4235,12 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4232 */ 4235 */
4233 if (!BTRFS_I(inode)->delalloc_meta_reserved) { 4236 if (!BTRFS_I(inode)->delalloc_meta_reserved) {
4234 nr_extents++; 4237 nr_extents++;
4235 BTRFS_I(inode)->delalloc_meta_reserved = 1; 4238 extra_reserve = 1;
4236 } 4239 }
4237 4240
4238 to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); 4241 to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
4239 to_reserve += calc_csum_metadata_size(inode, num_bytes, 1); 4242 to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
4243 csum_bytes = BTRFS_I(inode)->csum_bytes;
4240 spin_unlock(&BTRFS_I(inode)->lock); 4244 spin_unlock(&BTRFS_I(inode)->lock);
4241 4245
4242 ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); 4246 ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
@@ -4246,22 +4250,35 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4246 4250
4247 spin_lock(&BTRFS_I(inode)->lock); 4251 spin_lock(&BTRFS_I(inode)->lock);
4248 dropped = drop_outstanding_extent(inode); 4252 dropped = drop_outstanding_extent(inode);
4249 to_free = calc_csum_metadata_size(inode, num_bytes, 0);
4250 spin_unlock(&BTRFS_I(inode)->lock);
4251 to_free += btrfs_calc_trans_metadata_size(root, dropped);
4252
4253 /* 4253 /*
4254 * Somebody could have come in and twiddled with the 4254 * If the inodes csum_bytes is the same as the original
4255 * reservation, so if we have to free more than we would have 4255 * csum_bytes then we know we haven't raced with any free()ers
4256 * reserved from this reservation go ahead and release those 4256 * so we can just reduce our inodes csum bytes and carry on.
4257 * bytes. 4257 * Otherwise we have to do the normal free thing to account for
4258 * the case that the free side didn't free up its reserve
4259 * because of this outstanding reservation.
4258 */ 4260 */
4259 to_free -= to_reserve; 4261 if (BTRFS_I(inode)->csum_bytes == csum_bytes)
4262 calc_csum_metadata_size(inode, num_bytes, 0);
4263 else
4264 to_free = calc_csum_metadata_size(inode, num_bytes, 0);
4265 spin_unlock(&BTRFS_I(inode)->lock);
4266 if (dropped)
4267 to_free += btrfs_calc_trans_metadata_size(root, dropped);
4268
4260 if (to_free) 4269 if (to_free)
4261 btrfs_block_rsv_release(root, block_rsv, to_free); 4270 btrfs_block_rsv_release(root, block_rsv, to_free);
4262 return ret; 4271 return ret;
4263 } 4272 }
4264 4273
4274 spin_lock(&BTRFS_I(inode)->lock);
4275 if (extra_reserve) {
4276 BTRFS_I(inode)->delalloc_meta_reserved = 1;
4277 nr_extents--;
4278 }
4279 BTRFS_I(inode)->reserved_extents += nr_extents;
4280 spin_unlock(&BTRFS_I(inode)->lock);
4281
4265 block_rsv_add_bytes(block_rsv, to_reserve, 1); 4282 block_rsv_add_bytes(block_rsv, to_reserve, 1);
4266 4283
4267 return 0; 4284 return 0;
@@ -5107,11 +5124,11 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
5107 struct btrfs_root *root = orig_root->fs_info->extent_root; 5124 struct btrfs_root *root = orig_root->fs_info->extent_root;
5108 struct btrfs_free_cluster *last_ptr = NULL; 5125 struct btrfs_free_cluster *last_ptr = NULL;
5109 struct btrfs_block_group_cache *block_group = NULL; 5126 struct btrfs_block_group_cache *block_group = NULL;
5127 struct btrfs_block_group_cache *used_block_group;
5110 int empty_cluster = 2 * 1024 * 1024; 5128 int empty_cluster = 2 * 1024 * 1024;
5111 int allowed_chunk_alloc = 0; 5129 int allowed_chunk_alloc = 0;
5112 int done_chunk_alloc = 0; 5130 int done_chunk_alloc = 0;
5113 struct btrfs_space_info *space_info; 5131 struct btrfs_space_info *space_info;
5114 int last_ptr_loop = 0;
5115 int loop = 0; 5132 int loop = 0;
5116 int index = 0; 5133 int index = 0;
5117 int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ? 5134 int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ?
@@ -5173,6 +5190,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
5173ideal_cache: 5190ideal_cache:
5174 block_group = btrfs_lookup_block_group(root->fs_info, 5191 block_group = btrfs_lookup_block_group(root->fs_info,
5175 search_start); 5192 search_start);
5193 used_block_group = block_group;
5176 /* 5194 /*
5177 * we don't want to use the block group if it doesn't match our 5195 * we don't want to use the block group if it doesn't match our
5178 * allocation bits, or if its not cached. 5196 * allocation bits, or if its not cached.
@@ -5210,6 +5228,7 @@ search:
5210 u64 offset; 5228 u64 offset;
5211 int cached; 5229 int cached;
5212 5230
5231 used_block_group = block_group;
5213 btrfs_get_block_group(block_group); 5232 btrfs_get_block_group(block_group);
5214 search_start = block_group->key.objectid; 5233 search_start = block_group->key.objectid;
5215 5234
@@ -5286,71 +5305,62 @@ alloc:
5286 spin_unlock(&block_group->free_space_ctl->tree_lock); 5305 spin_unlock(&block_group->free_space_ctl->tree_lock);
5287 5306
5288 /* 5307 /*
5289 * Ok we want to try and use the cluster allocator, so lets look 5308 * Ok we want to try and use the cluster allocator, so
5290 * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will 5309 * lets look there
5291 * have tried the cluster allocator plenty of times at this
5292 * point and not have found anything, so we are likely way too
5293 * fragmented for the clustering stuff to find anything, so lets
5294 * just skip it and let the allocator find whatever block it can
5295 * find
5296 */ 5310 */
5297 if (last_ptr && loop < LOOP_NO_EMPTY_SIZE) { 5311 if (last_ptr) {
5298 /* 5312 /*
5299 * the refill lock keeps out other 5313 * the refill lock keeps out other
5300 * people trying to start a new cluster 5314 * people trying to start a new cluster
5301 */ 5315 */
5302 spin_lock(&last_ptr->refill_lock); 5316 spin_lock(&last_ptr->refill_lock);
5303 if (!last_ptr->block_group || 5317 used_block_group = last_ptr->block_group;
5304 last_ptr->block_group->ro || 5318 if (used_block_group != block_group &&
5305 !block_group_bits(last_ptr->block_group, data)) 5319 (!used_block_group ||
5320 used_block_group->ro ||
5321 !block_group_bits(used_block_group, data))) {
5322 used_block_group = block_group;
5306 goto refill_cluster; 5323 goto refill_cluster;
5324 }
5325
5326 if (used_block_group != block_group)
5327 btrfs_get_block_group(used_block_group);
5307 5328
5308 offset = btrfs_alloc_from_cluster(block_group, last_ptr, 5329 offset = btrfs_alloc_from_cluster(used_block_group,
5309 num_bytes, search_start); 5330 last_ptr, num_bytes, used_block_group->key.objectid);
5310 if (offset) { 5331 if (offset) {
5311 /* we have a block, we're done */ 5332 /* we have a block, we're done */
5312 spin_unlock(&last_ptr->refill_lock); 5333 spin_unlock(&last_ptr->refill_lock);
5313 goto checks; 5334 goto checks;
5314 } 5335 }
5315 5336
5316 spin_lock(&last_ptr->lock); 5337 WARN_ON(last_ptr->block_group != used_block_group);
5317 /* 5338 if (used_block_group != block_group) {
5318 * whoops, this cluster doesn't actually point to 5339 btrfs_put_block_group(used_block_group);
5319 * this block group. Get a ref on the block 5340 used_block_group = block_group;
5320 * group is does point to and try again
5321 */
5322 if (!last_ptr_loop && last_ptr->block_group &&
5323 last_ptr->block_group != block_group &&
5324 index <=
5325 get_block_group_index(last_ptr->block_group)) {
5326
5327 btrfs_put_block_group(block_group);
5328 block_group = last_ptr->block_group;
5329 btrfs_get_block_group(block_group);
5330 spin_unlock(&last_ptr->lock);
5331 spin_unlock(&last_ptr->refill_lock);
5332
5333 last_ptr_loop = 1;
5334 search_start = block_group->key.objectid;
5335 /*
5336 * we know this block group is properly
5337 * in the list because
5338 * btrfs_remove_block_group, drops the
5339 * cluster before it removes the block
5340 * group from the list
5341 */
5342 goto have_block_group;
5343 } 5341 }
5344 spin_unlock(&last_ptr->lock);
5345refill_cluster: 5342refill_cluster:
5343 BUG_ON(used_block_group != block_group);
5344 /* If we are on LOOP_NO_EMPTY_SIZE, we can't
5345 * set up a new clusters, so lets just skip it
5346 * and let the allocator find whatever block
5347 * it can find. If we reach this point, we
5348 * will have tried the cluster allocator
5349 * plenty of times and not have found
5350 * anything, so we are likely way too
5351 * fragmented for the clustering stuff to find
5352 * anything. */
5353 if (loop >= LOOP_NO_EMPTY_SIZE) {
5354 spin_unlock(&last_ptr->refill_lock);
5355 goto unclustered_alloc;
5356 }
5357
5346 /* 5358 /*
5347 * this cluster didn't work out, free it and 5359 * this cluster didn't work out, free it and
5348 * start over 5360 * start over
5349 */ 5361 */
5350 btrfs_return_cluster_to_free_space(NULL, last_ptr); 5362 btrfs_return_cluster_to_free_space(NULL, last_ptr);
5351 5363
5352 last_ptr_loop = 0;
5353
5354 /* allocate a cluster in this block group */ 5364 /* allocate a cluster in this block group */
5355 ret = btrfs_find_space_cluster(trans, root, 5365 ret = btrfs_find_space_cluster(trans, root,
5356 block_group, last_ptr, 5366 block_group, last_ptr,
@@ -5390,6 +5400,7 @@ refill_cluster:
5390 goto loop; 5400 goto loop;
5391 } 5401 }
5392 5402
5403unclustered_alloc:
5393 offset = btrfs_find_space_for_alloc(block_group, search_start, 5404 offset = btrfs_find_space_for_alloc(block_group, search_start,
5394 num_bytes, empty_size); 5405 num_bytes, empty_size);
5395 /* 5406 /*
@@ -5416,14 +5427,14 @@ checks:
5416 search_start = stripe_align(root, offset); 5427 search_start = stripe_align(root, offset);
5417 /* move on to the next group */ 5428 /* move on to the next group */
5418 if (search_start + num_bytes >= search_end) { 5429 if (search_start + num_bytes >= search_end) {
5419 btrfs_add_free_space(block_group, offset, num_bytes); 5430 btrfs_add_free_space(used_block_group, offset, num_bytes);
5420 goto loop; 5431 goto loop;
5421 } 5432 }
5422 5433
5423 /* move on to the next group */ 5434 /* move on to the next group */
5424 if (search_start + num_bytes > 5435 if (search_start + num_bytes >
5425 block_group->key.objectid + block_group->key.offset) { 5436 used_block_group->key.objectid + used_block_group->key.offset) {
5426 btrfs_add_free_space(block_group, offset, num_bytes); 5437 btrfs_add_free_space(used_block_group, offset, num_bytes);
5427 goto loop; 5438 goto loop;
5428 } 5439 }
5429 5440
@@ -5431,14 +5442,14 @@ checks:
5431 ins->offset = num_bytes; 5442 ins->offset = num_bytes;
5432 5443
5433 if (offset < search_start) 5444 if (offset < search_start)
5434 btrfs_add_free_space(block_group, offset, 5445 btrfs_add_free_space(used_block_group, offset,
5435 search_start - offset); 5446 search_start - offset);
5436 BUG_ON(offset > search_start); 5447 BUG_ON(offset > search_start);
5437 5448
5438 ret = btrfs_update_reserved_bytes(block_group, num_bytes, 5449 ret = btrfs_update_reserved_bytes(used_block_group, num_bytes,
5439 alloc_type); 5450 alloc_type);
5440 if (ret == -EAGAIN) { 5451 if (ret == -EAGAIN) {
5441 btrfs_add_free_space(block_group, offset, num_bytes); 5452 btrfs_add_free_space(used_block_group, offset, num_bytes);
5442 goto loop; 5453 goto loop;
5443 } 5454 }
5444 5455
@@ -5447,15 +5458,19 @@ checks:
5447 ins->offset = num_bytes; 5458 ins->offset = num_bytes;
5448 5459
5449 if (offset < search_start) 5460 if (offset < search_start)
5450 btrfs_add_free_space(block_group, offset, 5461 btrfs_add_free_space(used_block_group, offset,
5451 search_start - offset); 5462 search_start - offset);
5452 BUG_ON(offset > search_start); 5463 BUG_ON(offset > search_start);
5464 if (used_block_group != block_group)
5465 btrfs_put_block_group(used_block_group);
5453 btrfs_put_block_group(block_group); 5466 btrfs_put_block_group(block_group);
5454 break; 5467 break;
5455loop: 5468loop:
5456 failed_cluster_refill = false; 5469 failed_cluster_refill = false;
5457 failed_alloc = false; 5470 failed_alloc = false;
5458 BUG_ON(index != get_block_group_index(block_group)); 5471 BUG_ON(index != get_block_group_index(block_group));
5472 if (used_block_group != block_group)
5473 btrfs_put_block_group(used_block_group);
5459 btrfs_put_block_group(block_group); 5474 btrfs_put_block_group(block_group);
5460 } 5475 }
5461 up_read(&space_info->groups_sem); 5476 up_read(&space_info->groups_sem);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index be1bf627a14..49f3c9dc09f 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -935,8 +935,10 @@ again:
935 node = tree_search(tree, start); 935 node = tree_search(tree, start);
936 if (!node) { 936 if (!node) {
937 prealloc = alloc_extent_state_atomic(prealloc); 937 prealloc = alloc_extent_state_atomic(prealloc);
938 if (!prealloc) 938 if (!prealloc) {
939 return -ENOMEM; 939 err = -ENOMEM;
940 goto out;
941 }
940 err = insert_state(tree, prealloc, start, end, &bits); 942 err = insert_state(tree, prealloc, start, end, &bits);
941 prealloc = NULL; 943 prealloc = NULL;
942 BUG_ON(err == -EEXIST); 944 BUG_ON(err == -EEXIST);
@@ -992,8 +994,10 @@ hit_next:
992 */ 994 */
993 if (state->start < start) { 995 if (state->start < start) {
994 prealloc = alloc_extent_state_atomic(prealloc); 996 prealloc = alloc_extent_state_atomic(prealloc);
995 if (!prealloc) 997 if (!prealloc) {
996 return -ENOMEM; 998 err = -ENOMEM;
999 goto out;
1000 }
997 err = split_state(tree, state, prealloc, start); 1001 err = split_state(tree, state, prealloc, start);
998 BUG_ON(err == -EEXIST); 1002 BUG_ON(err == -EEXIST);
999 prealloc = NULL; 1003 prealloc = NULL;
@@ -1024,8 +1028,10 @@ hit_next:
1024 this_end = last_start - 1; 1028 this_end = last_start - 1;
1025 1029
1026 prealloc = alloc_extent_state_atomic(prealloc); 1030 prealloc = alloc_extent_state_atomic(prealloc);
1027 if (!prealloc) 1031 if (!prealloc) {
1028 return -ENOMEM; 1032 err = -ENOMEM;
1033 goto out;
1034 }
1029 1035
1030 /* 1036 /*
1031 * Avoid to free 'prealloc' if it can be merged with 1037 * Avoid to free 'prealloc' if it can be merged with
@@ -1051,8 +1057,10 @@ hit_next:
1051 */ 1057 */
1052 if (state->start <= end && state->end > end) { 1058 if (state->start <= end && state->end > end) {
1053 prealloc = alloc_extent_state_atomic(prealloc); 1059 prealloc = alloc_extent_state_atomic(prealloc);
1054 if (!prealloc) 1060 if (!prealloc) {
1055 return -ENOMEM; 1061 err = -ENOMEM;
1062 goto out;
1063 }
1056 1064
1057 err = split_state(tree, state, prealloc, end + 1); 1065 err = split_state(tree, state, prealloc, end + 1);
1058 BUG_ON(err == -EEXIST); 1066 BUG_ON(err == -EEXIST);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index dafdfa059bf..97fbe939c05 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1167,6 +1167,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1167 nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) / 1167 nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) /
1168 PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / 1168 PAGE_CACHE_SIZE, PAGE_CACHE_SIZE /
1169 (sizeof(struct page *))); 1169 (sizeof(struct page *)));
1170 nrptrs = min(nrptrs, current->nr_dirtied_pause - current->nr_dirtied);
1171 nrptrs = max(nrptrs, 8);
1170 pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); 1172 pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
1171 if (!pages) 1173 if (!pages)
1172 return -ENOMEM; 1174 return -ENOMEM;
@@ -1387,7 +1389,11 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1387 goto out; 1389 goto out;
1388 } 1390 }
1389 1391
1390 file_update_time(file); 1392 err = btrfs_update_time(file);
1393 if (err) {
1394 mutex_unlock(&inode->i_mutex);
1395 goto out;
1396 }
1391 BTRFS_I(inode)->sequence++; 1397 BTRFS_I(inode)->sequence++;
1392 1398
1393 start_pos = round_down(pos, root->sectorsize); 1399 start_pos = round_down(pos, root->sectorsize);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 2c984f7d4c2..0a6b928813a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -38,6 +38,7 @@
38#include <linux/falloc.h> 38#include <linux/falloc.h>
39#include <linux/slab.h> 39#include <linux/slab.h>
40#include <linux/ratelimit.h> 40#include <linux/ratelimit.h>
41#include <linux/mount.h>
41#include "compat.h" 42#include "compat.h"
42#include "ctree.h" 43#include "ctree.h"
43#include "disk-io.h" 44#include "disk-io.h"
@@ -2031,7 +2032,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
2031 /* insert an orphan item to track this unlinked/truncated file */ 2032 /* insert an orphan item to track this unlinked/truncated file */
2032 if (insert >= 1) { 2033 if (insert >= 1) {
2033 ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); 2034 ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
2034 BUG_ON(ret); 2035 BUG_ON(ret && ret != -EEXIST);
2035 } 2036 }
2036 2037
2037 /* insert an orphan item to track subvolume contains orphan files */ 2038 /* insert an orphan item to track subvolume contains orphan files */
@@ -2158,6 +2159,38 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
2158 if (ret && ret != -ESTALE) 2159 if (ret && ret != -ESTALE)
2159 goto out; 2160 goto out;
2160 2161
2162 if (ret == -ESTALE && root == root->fs_info->tree_root) {
2163 struct btrfs_root *dead_root;
2164 struct btrfs_fs_info *fs_info = root->fs_info;
2165 int is_dead_root = 0;
2166
2167 /*
2168 * this is an orphan in the tree root. Currently these
2169 * could come from 2 sources:
2170 * a) a snapshot deletion in progress
2171 * b) a free space cache inode
2172 * We need to distinguish those two, as the snapshot
2173 * orphan must not get deleted.
2174 * find_dead_roots already ran before us, so if this
2175 * is a snapshot deletion, we should find the root
2176 * in the dead_roots list
2177 */
2178 spin_lock(&fs_info->trans_lock);
2179 list_for_each_entry(dead_root, &fs_info->dead_roots,
2180 root_list) {
2181 if (dead_root->root_key.objectid ==
2182 found_key.objectid) {
2183 is_dead_root = 1;
2184 break;
2185 }
2186 }
2187 spin_unlock(&fs_info->trans_lock);
2188 if (is_dead_root) {
2189 /* prevent this orphan from being found again */
2190 key.offset = found_key.objectid - 1;
2191 continue;
2192 }
2193 }
2161 /* 2194 /*
2162 * Inode is already gone but the orphan item is still there, 2195 * Inode is already gone but the orphan item is still there,
2163 * kill the orphan item. 2196 * kill the orphan item.
@@ -2191,7 +2224,14 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
2191 continue; 2224 continue;
2192 } 2225 }
2193 nr_truncate++; 2226 nr_truncate++;
2227 /*
2228 * Need to hold the imutex for reservation purposes, not
2229 * a huge deal here but I have a WARN_ON in
2230 * btrfs_delalloc_reserve_space to catch offenders.
2231 */
2232 mutex_lock(&inode->i_mutex);
2194 ret = btrfs_truncate(inode); 2233 ret = btrfs_truncate(inode);
2234 mutex_unlock(&inode->i_mutex);
2195 } else { 2235 } else {
2196 nr_unlink++; 2236 nr_unlink++;
2197 } 2237 }
@@ -3327,7 +3367,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3327 u64 hint_byte = 0; 3367 u64 hint_byte = 0;
3328 hole_size = last_byte - cur_offset; 3368 hole_size = last_byte - cur_offset;
3329 3369
3330 trans = btrfs_start_transaction(root, 2); 3370 trans = btrfs_start_transaction(root, 3);
3331 if (IS_ERR(trans)) { 3371 if (IS_ERR(trans)) {
3332 err = PTR_ERR(trans); 3372 err = PTR_ERR(trans);
3333 break; 3373 break;
@@ -3337,6 +3377,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3337 cur_offset + hole_size, 3377 cur_offset + hole_size,
3338 &hint_byte, 1); 3378 &hint_byte, 1);
3339 if (err) { 3379 if (err) {
3380 btrfs_update_inode(trans, root, inode);
3340 btrfs_end_transaction(trans, root); 3381 btrfs_end_transaction(trans, root);
3341 break; 3382 break;
3342 } 3383 }
@@ -3346,6 +3387,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3346 0, hole_size, 0, hole_size, 3387 0, hole_size, 0, hole_size,
3347 0, 0, 0); 3388 0, 0, 0);
3348 if (err) { 3389 if (err) {
3390 btrfs_update_inode(trans, root, inode);
3349 btrfs_end_transaction(trans, root); 3391 btrfs_end_transaction(trans, root);
3350 break; 3392 break;
3351 } 3393 }
@@ -3353,6 +3395,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3353 btrfs_drop_extent_cache(inode, hole_start, 3395 btrfs_drop_extent_cache(inode, hole_start,
3354 last_byte - 1, 0); 3396 last_byte - 1, 0);
3355 3397
3398 btrfs_update_inode(trans, root, inode);
3356 btrfs_end_transaction(trans, root); 3399 btrfs_end_transaction(trans, root);
3357 } 3400 }
3358 free_extent_map(em); 3401 free_extent_map(em);
@@ -3370,6 +3413,8 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3370 3413
3371static int btrfs_setsize(struct inode *inode, loff_t newsize) 3414static int btrfs_setsize(struct inode *inode, loff_t newsize)
3372{ 3415{
3416 struct btrfs_root *root = BTRFS_I(inode)->root;
3417 struct btrfs_trans_handle *trans;
3373 loff_t oldsize = i_size_read(inode); 3418 loff_t oldsize = i_size_read(inode);
3374 int ret; 3419 int ret;
3375 3420
@@ -3377,16 +3422,19 @@ static int btrfs_setsize(struct inode *inode, loff_t newsize)
3377 return 0; 3422 return 0;
3378 3423
3379 if (newsize > oldsize) { 3424 if (newsize > oldsize) {
3380 i_size_write(inode, newsize);
3381 btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL);
3382 truncate_pagecache(inode, oldsize, newsize); 3425 truncate_pagecache(inode, oldsize, newsize);
3383 ret = btrfs_cont_expand(inode, oldsize, newsize); 3426 ret = btrfs_cont_expand(inode, oldsize, newsize);
3384 if (ret) { 3427 if (ret)
3385 btrfs_setsize(inode, oldsize);
3386 return ret; 3428 return ret;
3387 }
3388 3429
3389 mark_inode_dirty(inode); 3430 trans = btrfs_start_transaction(root, 1);
3431 if (IS_ERR(trans))
3432 return PTR_ERR(trans);
3433
3434 i_size_write(inode, newsize);
3435 btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL);
3436 ret = btrfs_update_inode(trans, root, inode);
3437 btrfs_end_transaction_throttle(trans, root);
3390 } else { 3438 } else {
3391 3439
3392 /* 3440 /*
@@ -3426,9 +3474,9 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
3426 3474
3427 if (attr->ia_valid) { 3475 if (attr->ia_valid) {
3428 setattr_copy(inode, attr); 3476 setattr_copy(inode, attr);
3429 mark_inode_dirty(inode); 3477 err = btrfs_dirty_inode(inode);
3430 3478
3431 if (attr->ia_valid & ATTR_MODE) 3479 if (!err && attr->ia_valid & ATTR_MODE)
3432 err = btrfs_acl_chmod(inode); 3480 err = btrfs_acl_chmod(inode);
3433 } 3481 }
3434 3482
@@ -4204,42 +4252,80 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
4204 * FIXME, needs more benchmarking...there are no reasons other than performance 4252 * FIXME, needs more benchmarking...there are no reasons other than performance
4205 * to keep or drop this code. 4253 * to keep or drop this code.
4206 */ 4254 */
4207void btrfs_dirty_inode(struct inode *inode, int flags) 4255int btrfs_dirty_inode(struct inode *inode)
4208{ 4256{
4209 struct btrfs_root *root = BTRFS_I(inode)->root; 4257 struct btrfs_root *root = BTRFS_I(inode)->root;
4210 struct btrfs_trans_handle *trans; 4258 struct btrfs_trans_handle *trans;
4211 int ret; 4259 int ret;
4212 4260
4213 if (BTRFS_I(inode)->dummy_inode) 4261 if (BTRFS_I(inode)->dummy_inode)
4214 return; 4262 return 0;
4215 4263
4216 trans = btrfs_join_transaction(root); 4264 trans = btrfs_join_transaction(root);
4217 BUG_ON(IS_ERR(trans)); 4265 if (IS_ERR(trans))
4266 return PTR_ERR(trans);
4218 4267
4219 ret = btrfs_update_inode(trans, root, inode); 4268 ret = btrfs_update_inode(trans, root, inode);
4220 if (ret && ret == -ENOSPC) { 4269 if (ret && ret == -ENOSPC) {
4221 /* whoops, lets try again with the full transaction */ 4270 /* whoops, lets try again with the full transaction */
4222 btrfs_end_transaction(trans, root); 4271 btrfs_end_transaction(trans, root);
4223 trans = btrfs_start_transaction(root, 1); 4272 trans = btrfs_start_transaction(root, 1);
4224 if (IS_ERR(trans)) { 4273 if (IS_ERR(trans))
4225 printk_ratelimited(KERN_ERR "btrfs: fail to " 4274 return PTR_ERR(trans);
4226 "dirty inode %llu error %ld\n",
4227 (unsigned long long)btrfs_ino(inode),
4228 PTR_ERR(trans));
4229 return;
4230 }
4231 4275
4232 ret = btrfs_update_inode(trans, root, inode); 4276 ret = btrfs_update_inode(trans, root, inode);
4233 if (ret) {
4234 printk_ratelimited(KERN_ERR "btrfs: fail to "
4235 "dirty inode %llu error %d\n",
4236 (unsigned long long)btrfs_ino(inode),
4237 ret);
4238 }
4239 } 4277 }
4240 btrfs_end_transaction(trans, root); 4278 btrfs_end_transaction(trans, root);
4241 if (BTRFS_I(inode)->delayed_node) 4279 if (BTRFS_I(inode)->delayed_node)
4242 btrfs_balance_delayed_items(root); 4280 btrfs_balance_delayed_items(root);
4281
4282 return ret;
4283}
4284
4285/*
4286 * This is a copy of file_update_time. We need this so we can return error on
4287 * ENOSPC for updating the inode in the case of file write and mmap writes.
4288 */
4289int btrfs_update_time(struct file *file)
4290{
4291 struct inode *inode = file->f_path.dentry->d_inode;
4292 struct timespec now;
4293 int ret;
4294 enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0;
4295
4296 /* First try to exhaust all avenues to not sync */
4297 if (IS_NOCMTIME(inode))
4298 return 0;
4299
4300 now = current_fs_time(inode->i_sb);
4301 if (!timespec_equal(&inode->i_mtime, &now))
4302 sync_it = S_MTIME;
4303
4304 if (!timespec_equal(&inode->i_ctime, &now))
4305 sync_it |= S_CTIME;
4306
4307 if (IS_I_VERSION(inode))
4308 sync_it |= S_VERSION;
4309
4310 if (!sync_it)
4311 return 0;
4312
4313 /* Finally allowed to write? Takes lock. */
4314 if (mnt_want_write_file(file))
4315 return 0;
4316
4317 /* Only change inode inside the lock region */
4318 if (sync_it & S_VERSION)
4319 inode_inc_iversion(inode);
4320 if (sync_it & S_CTIME)
4321 inode->i_ctime = now;
4322 if (sync_it & S_MTIME)
4323 inode->i_mtime = now;
4324 ret = btrfs_dirty_inode(inode);
4325 if (!ret)
4326 mark_inode_dirty_sync(inode);
4327 mnt_drop_write(file->f_path.mnt);
4328 return ret;
4243} 4329}
4244 4330
4245/* 4331/*
@@ -4555,11 +4641,18 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4555 goto out_unlock; 4641 goto out_unlock;
4556 } 4642 }
4557 4643
4644 /*
4645 * If the active LSM wants to access the inode during
4646 * d_instantiate it needs these. Smack checks to see
4647 * if the filesystem supports xattrs by looking at the
4648 * ops vector.
4649 */
4650
4651 inode->i_op = &btrfs_special_inode_operations;
4558 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); 4652 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
4559 if (err) 4653 if (err)
4560 drop_inode = 1; 4654 drop_inode = 1;
4561 else { 4655 else {
4562 inode->i_op = &btrfs_special_inode_operations;
4563 init_special_inode(inode, inode->i_mode, rdev); 4656 init_special_inode(inode, inode->i_mode, rdev);
4564 btrfs_update_inode(trans, root, inode); 4657 btrfs_update_inode(trans, root, inode);
4565 } 4658 }
@@ -4613,14 +4706,21 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4613 goto out_unlock; 4706 goto out_unlock;
4614 } 4707 }
4615 4708
4709 /*
4710 * If the active LSM wants to access the inode during
4711 * d_instantiate it needs these. Smack checks to see
4712 * if the filesystem supports xattrs by looking at the
4713 * ops vector.
4714 */
4715 inode->i_fop = &btrfs_file_operations;
4716 inode->i_op = &btrfs_file_inode_operations;
4717
4616 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); 4718 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
4617 if (err) 4719 if (err)
4618 drop_inode = 1; 4720 drop_inode = 1;
4619 else { 4721 else {
4620 inode->i_mapping->a_ops = &btrfs_aops; 4722 inode->i_mapping->a_ops = &btrfs_aops;
4621 inode->i_mapping->backing_dev_info = &root->fs_info->bdi; 4723 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
4622 inode->i_fop = &btrfs_file_operations;
4623 inode->i_op = &btrfs_file_inode_operations;
4624 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; 4724 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
4625 } 4725 }
4626out_unlock: 4726out_unlock:
@@ -6303,7 +6403,12 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
6303 u64 page_start; 6403 u64 page_start;
6304 u64 page_end; 6404 u64 page_end;
6305 6405
6406 /* Need this to keep space reservations serialized */
6407 mutex_lock(&inode->i_mutex);
6306 ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); 6408 ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
6409 mutex_unlock(&inode->i_mutex);
6410 if (!ret)
6411 ret = btrfs_update_time(vma->vm_file);
6307 if (ret) { 6412 if (ret) {
6308 if (ret == -ENOMEM) 6413 if (ret == -ENOMEM)
6309 ret = VM_FAULT_OOM; 6414 ret = VM_FAULT_OOM;
@@ -6515,8 +6620,9 @@ static int btrfs_truncate(struct inode *inode)
6515 /* Just need the 1 for updating the inode */ 6620 /* Just need the 1 for updating the inode */
6516 trans = btrfs_start_transaction(root, 1); 6621 trans = btrfs_start_transaction(root, 1);
6517 if (IS_ERR(trans)) { 6622 if (IS_ERR(trans)) {
6518 err = PTR_ERR(trans); 6623 ret = err = PTR_ERR(trans);
6519 goto out; 6624 trans = NULL;
6625 break;
6520 } 6626 }
6521 } 6627 }
6522 6628
@@ -7076,14 +7182,21 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
7076 goto out_unlock; 7182 goto out_unlock;
7077 } 7183 }
7078 7184
7185 /*
7186 * If the active LSM wants to access the inode during
7187 * d_instantiate it needs these. Smack checks to see
7188 * if the filesystem supports xattrs by looking at the
7189 * ops vector.
7190 */
7191 inode->i_fop = &btrfs_file_operations;
7192 inode->i_op = &btrfs_file_inode_operations;
7193
7079 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); 7194 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
7080 if (err) 7195 if (err)
7081 drop_inode = 1; 7196 drop_inode = 1;
7082 else { 7197 else {
7083 inode->i_mapping->a_ops = &btrfs_aops; 7198 inode->i_mapping->a_ops = &btrfs_aops;
7084 inode->i_mapping->backing_dev_info = &root->fs_info->bdi; 7199 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
7085 inode->i_fop = &btrfs_file_operations;
7086 inode->i_op = &btrfs_file_inode_operations;
7087 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; 7200 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
7088 } 7201 }
7089 if (drop_inode) 7202 if (drop_inode)
@@ -7353,6 +7466,7 @@ static const struct inode_operations btrfs_symlink_inode_operations = {
7353 .follow_link = page_follow_link_light, 7466 .follow_link = page_follow_link_light,
7354 .put_link = page_put_link, 7467 .put_link = page_put_link,
7355 .getattr = btrfs_getattr, 7468 .getattr = btrfs_getattr,
7469 .setattr = btrfs_setattr,
7356 .permission = btrfs_permission, 7470 .permission = btrfs_permission,
7357 .setxattr = btrfs_setxattr, 7471 .setxattr = btrfs_setxattr,
7358 .getxattr = btrfs_getxattr, 7472 .getxattr = btrfs_getxattr,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 72d461656f6..c04f02c7d5b 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -252,11 +252,11 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
252 trans = btrfs_join_transaction(root); 252 trans = btrfs_join_transaction(root);
253 BUG_ON(IS_ERR(trans)); 253 BUG_ON(IS_ERR(trans));
254 254
255 btrfs_update_iflags(inode);
256 inode->i_ctime = CURRENT_TIME;
255 ret = btrfs_update_inode(trans, root, inode); 257 ret = btrfs_update_inode(trans, root, inode);
256 BUG_ON(ret); 258 BUG_ON(ret);
257 259
258 btrfs_update_iflags(inode);
259 inode->i_ctime = CURRENT_TIME;
260 btrfs_end_transaction(trans, root); 260 btrfs_end_transaction(trans, root);
261 261
262 mnt_drop_write(file->f_path.mnt); 262 mnt_drop_write(file->f_path.mnt);
@@ -858,8 +858,10 @@ static int cluster_pages_for_defrag(struct inode *inode,
858 return 0; 858 return 0;
859 file_end = (isize - 1) >> PAGE_CACHE_SHIFT; 859 file_end = (isize - 1) >> PAGE_CACHE_SHIFT;
860 860
861 mutex_lock(&inode->i_mutex);
861 ret = btrfs_delalloc_reserve_space(inode, 862 ret = btrfs_delalloc_reserve_space(inode,
862 num_pages << PAGE_CACHE_SHIFT); 863 num_pages << PAGE_CACHE_SHIFT);
864 mutex_unlock(&inode->i_mutex);
863 if (ret) 865 if (ret)
864 return ret; 866 return ret;
865again: 867again:
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index dff29d5e151..cfb55434a46 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2947,7 +2947,9 @@ static int relocate_file_extent_cluster(struct inode *inode,
2947 index = (cluster->start - offset) >> PAGE_CACHE_SHIFT; 2947 index = (cluster->start - offset) >> PAGE_CACHE_SHIFT;
2948 last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT; 2948 last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT;
2949 while (index <= last_index) { 2949 while (index <= last_index) {
2950 mutex_lock(&inode->i_mutex);
2950 ret = btrfs_delalloc_reserve_metadata(inode, PAGE_CACHE_SIZE); 2951 ret = btrfs_delalloc_reserve_metadata(inode, PAGE_CACHE_SIZE);
2952 mutex_unlock(&inode->i_mutex);
2951 if (ret) 2953 if (ret)
2952 goto out; 2954 goto out;
2953 2955
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index c27bcb67f33..ddf2c90d3fc 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1535,18 +1535,22 @@ static noinline_for_stack int scrub_supers(struct scrub_dev *sdev)
1535static noinline_for_stack int scrub_workers_get(struct btrfs_root *root) 1535static noinline_for_stack int scrub_workers_get(struct btrfs_root *root)
1536{ 1536{
1537 struct btrfs_fs_info *fs_info = root->fs_info; 1537 struct btrfs_fs_info *fs_info = root->fs_info;
1538 int ret = 0;
1538 1539
1539 mutex_lock(&fs_info->scrub_lock); 1540 mutex_lock(&fs_info->scrub_lock);
1540 if (fs_info->scrub_workers_refcnt == 0) { 1541 if (fs_info->scrub_workers_refcnt == 0) {
1541 btrfs_init_workers(&fs_info->scrub_workers, "scrub", 1542 btrfs_init_workers(&fs_info->scrub_workers, "scrub",
1542 fs_info->thread_pool_size, &fs_info->generic_worker); 1543 fs_info->thread_pool_size, &fs_info->generic_worker);
1543 fs_info->scrub_workers.idle_thresh = 4; 1544 fs_info->scrub_workers.idle_thresh = 4;
1544 btrfs_start_workers(&fs_info->scrub_workers, 1); 1545 ret = btrfs_start_workers(&fs_info->scrub_workers);
1546 if (ret)
1547 goto out;
1545 } 1548 }
1546 ++fs_info->scrub_workers_refcnt; 1549 ++fs_info->scrub_workers_refcnt;
1550out:
1547 mutex_unlock(&fs_info->scrub_lock); 1551 mutex_unlock(&fs_info->scrub_lock);
1548 1552
1549 return 0; 1553 return ret;
1550} 1554}
1551 1555
1552static noinline_for_stack void scrub_workers_put(struct btrfs_root *root) 1556static noinline_for_stack void scrub_workers_put(struct btrfs_root *root)
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index e28ad4baf48..200f63bc667 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -41,6 +41,7 @@
41#include <linux/slab.h> 41#include <linux/slab.h>
42#include <linux/cleancache.h> 42#include <linux/cleancache.h>
43#include <linux/mnt_namespace.h> 43#include <linux/mnt_namespace.h>
44#include <linux/ratelimit.h>
44#include "compat.h" 45#include "compat.h"
45#include "delayed-inode.h" 46#include "delayed-inode.h"
46#include "ctree.h" 47#include "ctree.h"
@@ -1053,7 +1054,7 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
1053 u64 avail_space; 1054 u64 avail_space;
1054 u64 used_space; 1055 u64 used_space;
1055 u64 min_stripe_size; 1056 u64 min_stripe_size;
1056 int min_stripes = 1; 1057 int min_stripes = 1, num_stripes = 1;
1057 int i = 0, nr_devices; 1058 int i = 0, nr_devices;
1058 int ret; 1059 int ret;
1059 1060
@@ -1067,12 +1068,16 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
1067 1068
1068 /* calc min stripe number for data space alloction */ 1069 /* calc min stripe number for data space alloction */
1069 type = btrfs_get_alloc_profile(root, 1); 1070 type = btrfs_get_alloc_profile(root, 1);
1070 if (type & BTRFS_BLOCK_GROUP_RAID0) 1071 if (type & BTRFS_BLOCK_GROUP_RAID0) {
1071 min_stripes = 2; 1072 min_stripes = 2;
1072 else if (type & BTRFS_BLOCK_GROUP_RAID1) 1073 num_stripes = nr_devices;
1074 } else if (type & BTRFS_BLOCK_GROUP_RAID1) {
1073 min_stripes = 2; 1075 min_stripes = 2;
1074 else if (type & BTRFS_BLOCK_GROUP_RAID10) 1076 num_stripes = 2;
1077 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
1075 min_stripes = 4; 1078 min_stripes = 4;
1079 num_stripes = 4;
1080 }
1076 1081
1077 if (type & BTRFS_BLOCK_GROUP_DUP) 1082 if (type & BTRFS_BLOCK_GROUP_DUP)
1078 min_stripe_size = 2 * BTRFS_STRIPE_LEN; 1083 min_stripe_size = 2 * BTRFS_STRIPE_LEN;
@@ -1141,13 +1146,16 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
1141 i = nr_devices - 1; 1146 i = nr_devices - 1;
1142 avail_space = 0; 1147 avail_space = 0;
1143 while (nr_devices >= min_stripes) { 1148 while (nr_devices >= min_stripes) {
1149 if (num_stripes > nr_devices)
1150 num_stripes = nr_devices;
1151
1144 if (devices_info[i].max_avail >= min_stripe_size) { 1152 if (devices_info[i].max_avail >= min_stripe_size) {
1145 int j; 1153 int j;
1146 u64 alloc_size; 1154 u64 alloc_size;
1147 1155
1148 avail_space += devices_info[i].max_avail * min_stripes; 1156 avail_space += devices_info[i].max_avail * num_stripes;
1149 alloc_size = devices_info[i].max_avail; 1157 alloc_size = devices_info[i].max_avail;
1150 for (j = i + 1 - min_stripes; j <= i; j++) 1158 for (j = i + 1 - num_stripes; j <= i; j++)
1151 devices_info[j].max_avail -= alloc_size; 1159 devices_info[j].max_avail -= alloc_size;
1152 } 1160 }
1153 i--; 1161 i--;
@@ -1264,6 +1272,16 @@ static int btrfs_unfreeze(struct super_block *sb)
1264 return 0; 1272 return 0;
1265} 1273}
1266 1274
1275static void btrfs_fs_dirty_inode(struct inode *inode, int flags)
1276{
1277 int ret;
1278
1279 ret = btrfs_dirty_inode(inode);
1280 if (ret)
1281 printk_ratelimited(KERN_ERR "btrfs: fail to dirty inode %Lu "
1282 "error %d\n", btrfs_ino(inode), ret);
1283}
1284
1267static const struct super_operations btrfs_super_ops = { 1285static const struct super_operations btrfs_super_ops = {
1268 .drop_inode = btrfs_drop_inode, 1286 .drop_inode = btrfs_drop_inode,
1269 .evict_inode = btrfs_evict_inode, 1287 .evict_inode = btrfs_evict_inode,
@@ -1271,7 +1289,7 @@ static const struct super_operations btrfs_super_ops = {
1271 .sync_fs = btrfs_sync_fs, 1289 .sync_fs = btrfs_sync_fs,
1272 .show_options = btrfs_show_options, 1290 .show_options = btrfs_show_options,
1273 .write_inode = btrfs_write_inode, 1291 .write_inode = btrfs_write_inode,
1274 .dirty_inode = btrfs_dirty_inode, 1292 .dirty_inode = btrfs_fs_dirty_inode,
1275 .alloc_inode = btrfs_alloc_inode, 1293 .alloc_inode = btrfs_alloc_inode,
1276 .destroy_inode = btrfs_destroy_inode, 1294 .destroy_inode = btrfs_destroy_inode,
1277 .statfs = btrfs_statfs, 1295 .statfs = btrfs_statfs,
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index c37433d3cd8..f4b839fd3c9 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -295,6 +295,12 @@ loop_lock:
295 btrfs_requeue_work(&device->work); 295 btrfs_requeue_work(&device->work);
296 goto done; 296 goto done;
297 } 297 }
298 /* unplug every 64 requests just for good measure */
299 if (batch_run % 64 == 0) {
300 blk_finish_plug(&plug);
301 blk_start_plug(&plug);
302 sync_pending = 0;
303 }
298 } 304 }
299 305
300 cond_resched(); 306 cond_resched();
@@ -1611,7 +1617,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1611 if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding) 1617 if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding)
1612 return -EINVAL; 1618 return -EINVAL;
1613 1619
1614 bdev = blkdev_get_by_path(device_path, FMODE_EXCL, 1620 bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL,
1615 root->fs_info->bdev_holder); 1621 root->fs_info->bdev_holder);
1616 if (IS_ERR(bdev)) 1622 if (IS_ERR(bdev))
1617 return PTR_ERR(bdev); 1623 return PTR_ERR(bdev);
@@ -3258,7 +3264,7 @@ static void btrfs_end_bio(struct bio *bio, int err)
3258 */ 3264 */
3259 if (atomic_read(&bbio->error) > bbio->max_errors) { 3265 if (atomic_read(&bbio->error) > bbio->max_errors) {
3260 err = -EIO; 3266 err = -EIO;
3261 } else if (err) { 3267 } else {
3262 /* 3268 /*
3263 * this bio is actually up to date, we didn't 3269 * this bio is actually up to date, we didn't
3264 * go over the max number of errors 3270 * go over the max number of errors