aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/async-thread.c120
-rw-r--r--fs/btrfs/async-thread.h4
-rw-r--r--fs/btrfs/ctree.h3
-rw-r--r--fs/btrfs/delayed-inode.c4
-rw-r--r--fs/btrfs/disk-io.c34
-rw-r--r--fs/btrfs/extent-tree.c145
-rw-r--r--fs/btrfs/extent_io.c24
-rw-r--r--fs/btrfs/file.c8
-rw-r--r--fs/btrfs/inode.c189
-rw-r--r--fs/btrfs/ioctl.c6
-rw-r--r--fs/btrfs/relocation.c2
-rw-r--r--fs/btrfs/scrub.c8
-rw-r--r--fs/btrfs/super.c32
-rw-r--r--fs/btrfs/volumes.c10
-rw-r--r--fs/ceph/addr.c8
-rw-r--r--fs/ceph/caps.c187
-rw-r--r--fs/ceph/dir.c24
-rw-r--r--fs/ceph/file.c23
-rw-r--r--fs/ceph/inode.c53
-rw-r--r--fs/ceph/ioctl.c4
-rw-r--r--fs/ceph/mds_client.c33
-rw-r--r--fs/ceph/mds_client.h2
-rw-r--r--fs/ceph/snap.c16
-rw-r--r--fs/ceph/super.c2
-rw-r--r--fs/ceph/super.h31
-rw-r--r--fs/ceph/xattr.c42
-rw-r--r--fs/cifs/connect.c2
-rw-r--r--fs/cifs/file.c26
-rw-r--r--fs/cifs/readdir.c10
-rw-r--r--fs/cifs/smbencrypt.c6
-rw-r--r--fs/configfs/inode.c2
-rw-r--r--fs/configfs/mount.c36
-rw-r--r--fs/dcache.c71
-rw-r--r--fs/ext4/extents.c3
-rw-r--r--fs/ext4/inode.c54
-rw-r--r--fs/ext4/page-io.c12
-rw-r--r--fs/ext4/super.c17
-rw-r--r--fs/fs-writeback.c16
-rw-r--r--fs/fuse/dev.c3
-rw-r--r--fs/fuse/file.c6
-rw-r--r--fs/fuse/inode.c24
-rw-r--r--fs/namespace.c20
-rw-r--r--fs/ncpfs/inode.c8
-rw-r--r--fs/nfs/file.c2
-rw-r--r--fs/nfs/nfs4proc.c24
-rw-r--r--fs/nfs/nfs4state.c33
-rw-r--r--fs/nilfs2/ioctl.c16
-rw-r--r--fs/proc/meminfo.c7
-rw-r--r--fs/proc/root.c8
-rw-r--r--fs/proc/stat.c4
-rw-r--r--fs/seq_file.c6
-rw-r--r--fs/ubifs/super.c18
-rw-r--r--fs/xfs/xfs_acl.c2
-rw-r--r--fs/xfs/xfs_attr_leaf.c64
-rw-r--r--fs/xfs/xfs_bmap.c20
-rw-r--r--fs/xfs/xfs_export.c8
-rw-r--r--fs/xfs/xfs_inode.c21
-rw-r--r--fs/xfs/xfs_inode.h1
-rw-r--r--fs/xfs/xfs_log.c348
-rw-r--r--fs/xfs/xfs_sync.c11
-rw-r--r--fs/xfs/xfs_trace.h12
61 files changed, 1119 insertions, 816 deletions
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 7ec14097fef1..0b394580d860 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -64,6 +64,8 @@ struct btrfs_worker_thread {
64 int idle; 64 int idle;
65}; 65};
66 66
67static int __btrfs_start_workers(struct btrfs_workers *workers);
68
67/* 69/*
68 * btrfs_start_workers uses kthread_run, which can block waiting for memory 70 * btrfs_start_workers uses kthread_run, which can block waiting for memory
69 * for a very long time. It will actually throttle on page writeback, 71 * for a very long time. It will actually throttle on page writeback,
@@ -88,27 +90,10 @@ static void start_new_worker_func(struct btrfs_work *work)
88{ 90{
89 struct worker_start *start; 91 struct worker_start *start;
90 start = container_of(work, struct worker_start, work); 92 start = container_of(work, struct worker_start, work);
91 btrfs_start_workers(start->queue, 1); 93 __btrfs_start_workers(start->queue);
92 kfree(start); 94 kfree(start);
93} 95}
94 96
95static int start_new_worker(struct btrfs_workers *queue)
96{
97 struct worker_start *start;
98 int ret;
99
100 start = kzalloc(sizeof(*start), GFP_NOFS);
101 if (!start)
102 return -ENOMEM;
103
104 start->work.func = start_new_worker_func;
105 start->queue = queue;
106 ret = btrfs_queue_worker(queue->atomic_worker_start, &start->work);
107 if (ret)
108 kfree(start);
109 return ret;
110}
111
112/* 97/*
113 * helper function to move a thread onto the idle list after it 98 * helper function to move a thread onto the idle list after it
114 * has finished some requests. 99 * has finished some requests.
@@ -153,12 +138,20 @@ static void check_busy_worker(struct btrfs_worker_thread *worker)
153static void check_pending_worker_creates(struct btrfs_worker_thread *worker) 138static void check_pending_worker_creates(struct btrfs_worker_thread *worker)
154{ 139{
155 struct btrfs_workers *workers = worker->workers; 140 struct btrfs_workers *workers = worker->workers;
141 struct worker_start *start;
156 unsigned long flags; 142 unsigned long flags;
157 143
158 rmb(); 144 rmb();
159 if (!workers->atomic_start_pending) 145 if (!workers->atomic_start_pending)
160 return; 146 return;
161 147
148 start = kzalloc(sizeof(*start), GFP_NOFS);
149 if (!start)
150 return;
151
152 start->work.func = start_new_worker_func;
153 start->queue = workers;
154
162 spin_lock_irqsave(&workers->lock, flags); 155 spin_lock_irqsave(&workers->lock, flags);
163 if (!workers->atomic_start_pending) 156 if (!workers->atomic_start_pending)
164 goto out; 157 goto out;
@@ -170,10 +163,11 @@ static void check_pending_worker_creates(struct btrfs_worker_thread *worker)
170 163
171 workers->num_workers_starting += 1; 164 workers->num_workers_starting += 1;
172 spin_unlock_irqrestore(&workers->lock, flags); 165 spin_unlock_irqrestore(&workers->lock, flags);
173 start_new_worker(workers); 166 btrfs_queue_worker(workers->atomic_worker_start, &start->work);
174 return; 167 return;
175 168
176out: 169out:
170 kfree(start);
177 spin_unlock_irqrestore(&workers->lock, flags); 171 spin_unlock_irqrestore(&workers->lock, flags);
178} 172}
179 173
@@ -331,7 +325,7 @@ again:
331 run_ordered_completions(worker->workers, work); 325 run_ordered_completions(worker->workers, work);
332 326
333 check_pending_worker_creates(worker); 327 check_pending_worker_creates(worker);
334 328 cond_resched();
335 } 329 }
336 330
337 spin_lock_irq(&worker->lock); 331 spin_lock_irq(&worker->lock);
@@ -462,56 +456,55 @@ void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max,
462 * starts new worker threads. This does not enforce the max worker 456 * starts new worker threads. This does not enforce the max worker
463 * count in case you need to temporarily go past it. 457 * count in case you need to temporarily go past it.
464 */ 458 */
465static int __btrfs_start_workers(struct btrfs_workers *workers, 459static int __btrfs_start_workers(struct btrfs_workers *workers)
466 int num_workers)
467{ 460{
468 struct btrfs_worker_thread *worker; 461 struct btrfs_worker_thread *worker;
469 int ret = 0; 462 int ret = 0;
470 int i;
471 463
472 for (i = 0; i < num_workers; i++) { 464 worker = kzalloc(sizeof(*worker), GFP_NOFS);
473 worker = kzalloc(sizeof(*worker), GFP_NOFS); 465 if (!worker) {
474 if (!worker) { 466 ret = -ENOMEM;
475 ret = -ENOMEM; 467 goto fail;
476 goto fail; 468 }
477 }
478 469
479 INIT_LIST_HEAD(&worker->pending); 470 INIT_LIST_HEAD(&worker->pending);
480 INIT_LIST_HEAD(&worker->prio_pending); 471 INIT_LIST_HEAD(&worker->prio_pending);
481 INIT_LIST_HEAD(&worker->worker_list); 472 INIT_LIST_HEAD(&worker->worker_list);
482 spin_lock_init(&worker->lock); 473 spin_lock_init(&worker->lock);
483 474
484 atomic_set(&worker->num_pending, 0); 475 atomic_set(&worker->num_pending, 0);
485 atomic_set(&worker->refs, 1); 476 atomic_set(&worker->refs, 1);
486 worker->workers = workers; 477 worker->workers = workers;
487 worker->task = kthread_run(worker_loop, worker, 478 worker->task = kthread_run(worker_loop, worker,
488 "btrfs-%s-%d", workers->name, 479 "btrfs-%s-%d", workers->name,
489 workers->num_workers + i); 480 workers->num_workers + 1);
490 if (IS_ERR(worker->task)) { 481 if (IS_ERR(worker->task)) {
491 ret = PTR_ERR(worker->task); 482 ret = PTR_ERR(worker->task);
492 kfree(worker); 483 kfree(worker);
493 goto fail; 484 goto fail;
494 }
495 spin_lock_irq(&workers->lock);
496 list_add_tail(&worker->worker_list, &workers->idle_list);
497 worker->idle = 1;
498 workers->num_workers++;
499 workers->num_workers_starting--;
500 WARN_ON(workers->num_workers_starting < 0);
501 spin_unlock_irq(&workers->lock);
502 } 485 }
486 spin_lock_irq(&workers->lock);
487 list_add_tail(&worker->worker_list, &workers->idle_list);
488 worker->idle = 1;
489 workers->num_workers++;
490 workers->num_workers_starting--;
491 WARN_ON(workers->num_workers_starting < 0);
492 spin_unlock_irq(&workers->lock);
493
503 return 0; 494 return 0;
504fail: 495fail:
505 btrfs_stop_workers(workers); 496 spin_lock_irq(&workers->lock);
497 workers->num_workers_starting--;
498 spin_unlock_irq(&workers->lock);
506 return ret; 499 return ret;
507} 500}
508 501
509int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) 502int btrfs_start_workers(struct btrfs_workers *workers)
510{ 503{
511 spin_lock_irq(&workers->lock); 504 spin_lock_irq(&workers->lock);
512 workers->num_workers_starting += num_workers; 505 workers->num_workers_starting++;
513 spin_unlock_irq(&workers->lock); 506 spin_unlock_irq(&workers->lock);
514 return __btrfs_start_workers(workers, num_workers); 507 return __btrfs_start_workers(workers);
515} 508}
516 509
517/* 510/*
@@ -568,9 +561,10 @@ static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers)
568 struct btrfs_worker_thread *worker; 561 struct btrfs_worker_thread *worker;
569 unsigned long flags; 562 unsigned long flags;
570 struct list_head *fallback; 563 struct list_head *fallback;
564 int ret;
571 565
572again:
573 spin_lock_irqsave(&workers->lock, flags); 566 spin_lock_irqsave(&workers->lock, flags);
567again:
574 worker = next_worker(workers); 568 worker = next_worker(workers);
575 569
576 if (!worker) { 570 if (!worker) {
@@ -584,7 +578,10 @@ again:
584 workers->num_workers_starting++; 578 workers->num_workers_starting++;
585 spin_unlock_irqrestore(&workers->lock, flags); 579 spin_unlock_irqrestore(&workers->lock, flags);
586 /* we're below the limit, start another worker */ 580 /* we're below the limit, start another worker */
587 __btrfs_start_workers(workers, 1); 581 ret = __btrfs_start_workers(workers);
582 spin_lock_irqsave(&workers->lock, flags);
583 if (ret)
584 goto fallback;
588 goto again; 585 goto again;
589 } 586 }
590 } 587 }
@@ -665,7 +662,7 @@ void btrfs_set_work_high_prio(struct btrfs_work *work)
665/* 662/*
666 * places a struct btrfs_work into the pending queue of one of the kthreads 663 * places a struct btrfs_work into the pending queue of one of the kthreads
667 */ 664 */
668int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) 665void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
669{ 666{
670 struct btrfs_worker_thread *worker; 667 struct btrfs_worker_thread *worker;
671 unsigned long flags; 668 unsigned long flags;
@@ -673,7 +670,7 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
673 670
674 /* don't requeue something already on a list */ 671 /* don't requeue something already on a list */
675 if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags)) 672 if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags))
676 goto out; 673 return;
677 674
678 worker = find_worker(workers); 675 worker = find_worker(workers);
679 if (workers->ordered) { 676 if (workers->ordered) {
@@ -712,7 +709,4 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
712 if (wake) 709 if (wake)
713 wake_up_process(worker->task); 710 wake_up_process(worker->task);
714 spin_unlock_irqrestore(&worker->lock, flags); 711 spin_unlock_irqrestore(&worker->lock, flags);
715
716out:
717 return 0;
718} 712}
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index 5077746cf85e..f34cc31fa3c9 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -109,8 +109,8 @@ struct btrfs_workers {
109 char *name; 109 char *name;
110}; 110};
111 111
112int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work); 112void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work);
113int btrfs_start_workers(struct btrfs_workers *workers, int num_workers); 113int btrfs_start_workers(struct btrfs_workers *workers);
114int btrfs_stop_workers(struct btrfs_workers *workers); 114int btrfs_stop_workers(struct btrfs_workers *workers);
115void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max, 115void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max,
116 struct btrfs_workers *async_starter); 116 struct btrfs_workers *async_starter);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 50634abef9b4..67385033323d 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2692,7 +2692,8 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
2692int btrfs_readpage(struct file *file, struct page *page); 2692int btrfs_readpage(struct file *file, struct page *page);
2693void btrfs_evict_inode(struct inode *inode); 2693void btrfs_evict_inode(struct inode *inode);
2694int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc); 2694int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc);
2695void btrfs_dirty_inode(struct inode *inode, int flags); 2695int btrfs_dirty_inode(struct inode *inode);
2696int btrfs_update_time(struct file *file);
2696struct inode *btrfs_alloc_inode(struct super_block *sb); 2697struct inode *btrfs_alloc_inode(struct super_block *sb);
2697void btrfs_destroy_inode(struct inode *inode); 2698void btrfs_destroy_inode(struct inode *inode);
2698int btrfs_drop_inode(struct inode *inode); 2699int btrfs_drop_inode(struct inode *inode);
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 5b163572e0ca..9c1eccc2c503 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -640,8 +640,8 @@ static int btrfs_delayed_inode_reserve_metadata(
640 * Now if src_rsv == delalloc_block_rsv we'll let it just steal since 640 * Now if src_rsv == delalloc_block_rsv we'll let it just steal since
641 * we're accounted for. 641 * we're accounted for.
642 */ 642 */
643 if (!trans->bytes_reserved && 643 if (!src_rsv || (!trans->bytes_reserved &&
644 src_rsv != &root->fs_info->delalloc_block_rsv) { 644 src_rsv != &root->fs_info->delalloc_block_rsv)) {
645 ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes); 645 ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes);
646 /* 646 /*
647 * Since we're under a transaction reserve_metadata_bytes could 647 * Since we're under a transaction reserve_metadata_bytes could
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 632f8f3cc9db..f44b3928dc2d 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2194,19 +2194,27 @@ struct btrfs_root *open_ctree(struct super_block *sb,
2194 fs_info->endio_meta_write_workers.idle_thresh = 2; 2194 fs_info->endio_meta_write_workers.idle_thresh = 2;
2195 fs_info->readahead_workers.idle_thresh = 2; 2195 fs_info->readahead_workers.idle_thresh = 2;
2196 2196
2197 btrfs_start_workers(&fs_info->workers, 1); 2197 /*
2198 btrfs_start_workers(&fs_info->generic_worker, 1); 2198 * btrfs_start_workers can really only fail because of ENOMEM so just
2199 btrfs_start_workers(&fs_info->submit_workers, 1); 2199 * return -ENOMEM if any of these fail.
2200 btrfs_start_workers(&fs_info->delalloc_workers, 1); 2200 */
2201 btrfs_start_workers(&fs_info->fixup_workers, 1); 2201 ret = btrfs_start_workers(&fs_info->workers);
2202 btrfs_start_workers(&fs_info->endio_workers, 1); 2202 ret |= btrfs_start_workers(&fs_info->generic_worker);
2203 btrfs_start_workers(&fs_info->endio_meta_workers, 1); 2203 ret |= btrfs_start_workers(&fs_info->submit_workers);
2204 btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); 2204 ret |= btrfs_start_workers(&fs_info->delalloc_workers);
2205 btrfs_start_workers(&fs_info->endio_write_workers, 1); 2205 ret |= btrfs_start_workers(&fs_info->fixup_workers);
2206 btrfs_start_workers(&fs_info->endio_freespace_worker, 1); 2206 ret |= btrfs_start_workers(&fs_info->endio_workers);
2207 btrfs_start_workers(&fs_info->delayed_workers, 1); 2207 ret |= btrfs_start_workers(&fs_info->endio_meta_workers);
2208 btrfs_start_workers(&fs_info->caching_workers, 1); 2208 ret |= btrfs_start_workers(&fs_info->endio_meta_write_workers);
2209 btrfs_start_workers(&fs_info->readahead_workers, 1); 2209 ret |= btrfs_start_workers(&fs_info->endio_write_workers);
2210 ret |= btrfs_start_workers(&fs_info->endio_freespace_worker);
2211 ret |= btrfs_start_workers(&fs_info->delayed_workers);
2212 ret |= btrfs_start_workers(&fs_info->caching_workers);
2213 ret |= btrfs_start_workers(&fs_info->readahead_workers);
2214 if (ret) {
2215 ret = -ENOMEM;
2216 goto fail_sb_buffer;
2217 }
2210 2218
2211 fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); 2219 fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
2212 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, 2220 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index f0d5718d2587..f5fbe576d2ba 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2822,7 +2822,7 @@ out_free:
2822 btrfs_release_path(path); 2822 btrfs_release_path(path);
2823out: 2823out:
2824 spin_lock(&block_group->lock); 2824 spin_lock(&block_group->lock);
2825 if (!ret) 2825 if (!ret && dcs == BTRFS_DC_SETUP)
2826 block_group->cache_generation = trans->transid; 2826 block_group->cache_generation = trans->transid;
2827 block_group->disk_cache_state = dcs; 2827 block_group->disk_cache_state = dcs;
2828 spin_unlock(&block_group->lock); 2828 spin_unlock(&block_group->lock);
@@ -4204,12 +4204,17 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4204 struct btrfs_root *root = BTRFS_I(inode)->root; 4204 struct btrfs_root *root = BTRFS_I(inode)->root;
4205 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; 4205 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
4206 u64 to_reserve = 0; 4206 u64 to_reserve = 0;
4207 u64 csum_bytes;
4207 unsigned nr_extents = 0; 4208 unsigned nr_extents = 0;
4209 int extra_reserve = 0;
4208 int flush = 1; 4210 int flush = 1;
4209 int ret; 4211 int ret;
4210 4212
4213 /* Need to be holding the i_mutex here if we aren't free space cache */
4211 if (btrfs_is_free_space_inode(root, inode)) 4214 if (btrfs_is_free_space_inode(root, inode))
4212 flush = 0; 4215 flush = 0;
4216 else
4217 WARN_ON(!mutex_is_locked(&inode->i_mutex));
4213 4218
4214 if (flush && btrfs_transaction_in_commit(root->fs_info)) 4219 if (flush && btrfs_transaction_in_commit(root->fs_info))
4215 schedule_timeout(1); 4220 schedule_timeout(1);
@@ -4220,11 +4225,9 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4220 BTRFS_I(inode)->outstanding_extents++; 4225 BTRFS_I(inode)->outstanding_extents++;
4221 4226
4222 if (BTRFS_I(inode)->outstanding_extents > 4227 if (BTRFS_I(inode)->outstanding_extents >
4223 BTRFS_I(inode)->reserved_extents) { 4228 BTRFS_I(inode)->reserved_extents)
4224 nr_extents = BTRFS_I(inode)->outstanding_extents - 4229 nr_extents = BTRFS_I(inode)->outstanding_extents -
4225 BTRFS_I(inode)->reserved_extents; 4230 BTRFS_I(inode)->reserved_extents;
4226 BTRFS_I(inode)->reserved_extents += nr_extents;
4227 }
4228 4231
4229 /* 4232 /*
4230 * Add an item to reserve for updating the inode when we complete the 4233 * Add an item to reserve for updating the inode when we complete the
@@ -4232,11 +4235,12 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4232 */ 4235 */
4233 if (!BTRFS_I(inode)->delalloc_meta_reserved) { 4236 if (!BTRFS_I(inode)->delalloc_meta_reserved) {
4234 nr_extents++; 4237 nr_extents++;
4235 BTRFS_I(inode)->delalloc_meta_reserved = 1; 4238 extra_reserve = 1;
4236 } 4239 }
4237 4240
4238 to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); 4241 to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
4239 to_reserve += calc_csum_metadata_size(inode, num_bytes, 1); 4242 to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
4243 csum_bytes = BTRFS_I(inode)->csum_bytes;
4240 spin_unlock(&BTRFS_I(inode)->lock); 4244 spin_unlock(&BTRFS_I(inode)->lock);
4241 4245
4242 ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); 4246 ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
@@ -4246,22 +4250,35 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4246 4250
4247 spin_lock(&BTRFS_I(inode)->lock); 4251 spin_lock(&BTRFS_I(inode)->lock);
4248 dropped = drop_outstanding_extent(inode); 4252 dropped = drop_outstanding_extent(inode);
4249 to_free = calc_csum_metadata_size(inode, num_bytes, 0);
4250 spin_unlock(&BTRFS_I(inode)->lock);
4251 to_free += btrfs_calc_trans_metadata_size(root, dropped);
4252
4253 /* 4253 /*
4254 * Somebody could have come in and twiddled with the 4254 * If the inodes csum_bytes is the same as the original
4255 * reservation, so if we have to free more than we would have 4255 * csum_bytes then we know we haven't raced with any free()ers
4256 * reserved from this reservation go ahead and release those 4256 * so we can just reduce our inodes csum bytes and carry on.
4257 * bytes. 4257 * Otherwise we have to do the normal free thing to account for
4258 * the case that the free side didn't free up its reserve
4259 * because of this outstanding reservation.
4258 */ 4260 */
4259 to_free -= to_reserve; 4261 if (BTRFS_I(inode)->csum_bytes == csum_bytes)
4262 calc_csum_metadata_size(inode, num_bytes, 0);
4263 else
4264 to_free = calc_csum_metadata_size(inode, num_bytes, 0);
4265 spin_unlock(&BTRFS_I(inode)->lock);
4266 if (dropped)
4267 to_free += btrfs_calc_trans_metadata_size(root, dropped);
4268
4260 if (to_free) 4269 if (to_free)
4261 btrfs_block_rsv_release(root, block_rsv, to_free); 4270 btrfs_block_rsv_release(root, block_rsv, to_free);
4262 return ret; 4271 return ret;
4263 } 4272 }
4264 4273
4274 spin_lock(&BTRFS_I(inode)->lock);
4275 if (extra_reserve) {
4276 BTRFS_I(inode)->delalloc_meta_reserved = 1;
4277 nr_extents--;
4278 }
4279 BTRFS_I(inode)->reserved_extents += nr_extents;
4280 spin_unlock(&BTRFS_I(inode)->lock);
4281
4265 block_rsv_add_bytes(block_rsv, to_reserve, 1); 4282 block_rsv_add_bytes(block_rsv, to_reserve, 1);
4266 4283
4267 return 0; 4284 return 0;
@@ -5107,11 +5124,11 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
5107 struct btrfs_root *root = orig_root->fs_info->extent_root; 5124 struct btrfs_root *root = orig_root->fs_info->extent_root;
5108 struct btrfs_free_cluster *last_ptr = NULL; 5125 struct btrfs_free_cluster *last_ptr = NULL;
5109 struct btrfs_block_group_cache *block_group = NULL; 5126 struct btrfs_block_group_cache *block_group = NULL;
5127 struct btrfs_block_group_cache *used_block_group;
5110 int empty_cluster = 2 * 1024 * 1024; 5128 int empty_cluster = 2 * 1024 * 1024;
5111 int allowed_chunk_alloc = 0; 5129 int allowed_chunk_alloc = 0;
5112 int done_chunk_alloc = 0; 5130 int done_chunk_alloc = 0;
5113 struct btrfs_space_info *space_info; 5131 struct btrfs_space_info *space_info;
5114 int last_ptr_loop = 0;
5115 int loop = 0; 5132 int loop = 0;
5116 int index = 0; 5133 int index = 0;
5117 int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ? 5134 int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ?
@@ -5173,6 +5190,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
5173ideal_cache: 5190ideal_cache:
5174 block_group = btrfs_lookup_block_group(root->fs_info, 5191 block_group = btrfs_lookup_block_group(root->fs_info,
5175 search_start); 5192 search_start);
5193 used_block_group = block_group;
5176 /* 5194 /*
5177 * we don't want to use the block group if it doesn't match our 5195 * we don't want to use the block group if it doesn't match our
5178 * allocation bits, or if its not cached. 5196 * allocation bits, or if its not cached.
@@ -5210,6 +5228,7 @@ search:
5210 u64 offset; 5228 u64 offset;
5211 int cached; 5229 int cached;
5212 5230
5231 used_block_group = block_group;
5213 btrfs_get_block_group(block_group); 5232 btrfs_get_block_group(block_group);
5214 search_start = block_group->key.objectid; 5233 search_start = block_group->key.objectid;
5215 5234
@@ -5286,71 +5305,62 @@ alloc:
5286 spin_unlock(&block_group->free_space_ctl->tree_lock); 5305 spin_unlock(&block_group->free_space_ctl->tree_lock);
5287 5306
5288 /* 5307 /*
5289 * Ok we want to try and use the cluster allocator, so lets look 5308 * Ok we want to try and use the cluster allocator, so
5290 * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will 5309 * lets look there
5291 * have tried the cluster allocator plenty of times at this
5292 * point and not have found anything, so we are likely way too
5293 * fragmented for the clustering stuff to find anything, so lets
5294 * just skip it and let the allocator find whatever block it can
5295 * find
5296 */ 5310 */
5297 if (last_ptr && loop < LOOP_NO_EMPTY_SIZE) { 5311 if (last_ptr) {
5298 /* 5312 /*
5299 * the refill lock keeps out other 5313 * the refill lock keeps out other
5300 * people trying to start a new cluster 5314 * people trying to start a new cluster
5301 */ 5315 */
5302 spin_lock(&last_ptr->refill_lock); 5316 spin_lock(&last_ptr->refill_lock);
5303 if (!last_ptr->block_group || 5317 used_block_group = last_ptr->block_group;
5304 last_ptr->block_group->ro || 5318 if (used_block_group != block_group &&
5305 !block_group_bits(last_ptr->block_group, data)) 5319 (!used_block_group ||
5320 used_block_group->ro ||
5321 !block_group_bits(used_block_group, data))) {
5322 used_block_group = block_group;
5306 goto refill_cluster; 5323 goto refill_cluster;
5324 }
5325
5326 if (used_block_group != block_group)
5327 btrfs_get_block_group(used_block_group);
5307 5328
5308 offset = btrfs_alloc_from_cluster(block_group, last_ptr, 5329 offset = btrfs_alloc_from_cluster(used_block_group,
5309 num_bytes, search_start); 5330 last_ptr, num_bytes, used_block_group->key.objectid);
5310 if (offset) { 5331 if (offset) {
5311 /* we have a block, we're done */ 5332 /* we have a block, we're done */
5312 spin_unlock(&last_ptr->refill_lock); 5333 spin_unlock(&last_ptr->refill_lock);
5313 goto checks; 5334 goto checks;
5314 } 5335 }
5315 5336
5316 spin_lock(&last_ptr->lock); 5337 WARN_ON(last_ptr->block_group != used_block_group);
5317 /* 5338 if (used_block_group != block_group) {
5318 * whoops, this cluster doesn't actually point to 5339 btrfs_put_block_group(used_block_group);
5319 * this block group. Get a ref on the block 5340 used_block_group = block_group;
5320 * group is does point to and try again
5321 */
5322 if (!last_ptr_loop && last_ptr->block_group &&
5323 last_ptr->block_group != block_group &&
5324 index <=
5325 get_block_group_index(last_ptr->block_group)) {
5326
5327 btrfs_put_block_group(block_group);
5328 block_group = last_ptr->block_group;
5329 btrfs_get_block_group(block_group);
5330 spin_unlock(&last_ptr->lock);
5331 spin_unlock(&last_ptr->refill_lock);
5332
5333 last_ptr_loop = 1;
5334 search_start = block_group->key.objectid;
5335 /*
5336 * we know this block group is properly
5337 * in the list because
5338 * btrfs_remove_block_group, drops the
5339 * cluster before it removes the block
5340 * group from the list
5341 */
5342 goto have_block_group;
5343 } 5341 }
5344 spin_unlock(&last_ptr->lock);
5345refill_cluster: 5342refill_cluster:
5343 BUG_ON(used_block_group != block_group);
5344 /* If we are on LOOP_NO_EMPTY_SIZE, we can't
5345 * set up a new clusters, so lets just skip it
5346 * and let the allocator find whatever block
5347 * it can find. If we reach this point, we
5348 * will have tried the cluster allocator
5349 * plenty of times and not have found
5350 * anything, so we are likely way too
5351 * fragmented for the clustering stuff to find
5352 * anything. */
5353 if (loop >= LOOP_NO_EMPTY_SIZE) {
5354 spin_unlock(&last_ptr->refill_lock);
5355 goto unclustered_alloc;
5356 }
5357
5346 /* 5358 /*
5347 * this cluster didn't work out, free it and 5359 * this cluster didn't work out, free it and
5348 * start over 5360 * start over
5349 */ 5361 */
5350 btrfs_return_cluster_to_free_space(NULL, last_ptr); 5362 btrfs_return_cluster_to_free_space(NULL, last_ptr);
5351 5363
5352 last_ptr_loop = 0;
5353
5354 /* allocate a cluster in this block group */ 5364 /* allocate a cluster in this block group */
5355 ret = btrfs_find_space_cluster(trans, root, 5365 ret = btrfs_find_space_cluster(trans, root,
5356 block_group, last_ptr, 5366 block_group, last_ptr,
@@ -5390,6 +5400,7 @@ refill_cluster:
5390 goto loop; 5400 goto loop;
5391 } 5401 }
5392 5402
5403unclustered_alloc:
5393 offset = btrfs_find_space_for_alloc(block_group, search_start, 5404 offset = btrfs_find_space_for_alloc(block_group, search_start,
5394 num_bytes, empty_size); 5405 num_bytes, empty_size);
5395 /* 5406 /*
@@ -5416,14 +5427,14 @@ checks:
5416 search_start = stripe_align(root, offset); 5427 search_start = stripe_align(root, offset);
5417 /* move on to the next group */ 5428 /* move on to the next group */
5418 if (search_start + num_bytes >= search_end) { 5429 if (search_start + num_bytes >= search_end) {
5419 btrfs_add_free_space(block_group, offset, num_bytes); 5430 btrfs_add_free_space(used_block_group, offset, num_bytes);
5420 goto loop; 5431 goto loop;
5421 } 5432 }
5422 5433
5423 /* move on to the next group */ 5434 /* move on to the next group */
5424 if (search_start + num_bytes > 5435 if (search_start + num_bytes >
5425 block_group->key.objectid + block_group->key.offset) { 5436 used_block_group->key.objectid + used_block_group->key.offset) {
5426 btrfs_add_free_space(block_group, offset, num_bytes); 5437 btrfs_add_free_space(used_block_group, offset, num_bytes);
5427 goto loop; 5438 goto loop;
5428 } 5439 }
5429 5440
@@ -5431,14 +5442,14 @@ checks:
5431 ins->offset = num_bytes; 5442 ins->offset = num_bytes;
5432 5443
5433 if (offset < search_start) 5444 if (offset < search_start)
5434 btrfs_add_free_space(block_group, offset, 5445 btrfs_add_free_space(used_block_group, offset,
5435 search_start - offset); 5446 search_start - offset);
5436 BUG_ON(offset > search_start); 5447 BUG_ON(offset > search_start);
5437 5448
5438 ret = btrfs_update_reserved_bytes(block_group, num_bytes, 5449 ret = btrfs_update_reserved_bytes(used_block_group, num_bytes,
5439 alloc_type); 5450 alloc_type);
5440 if (ret == -EAGAIN) { 5451 if (ret == -EAGAIN) {
5441 btrfs_add_free_space(block_group, offset, num_bytes); 5452 btrfs_add_free_space(used_block_group, offset, num_bytes);
5442 goto loop; 5453 goto loop;
5443 } 5454 }
5444 5455
@@ -5447,15 +5458,19 @@ checks:
5447 ins->offset = num_bytes; 5458 ins->offset = num_bytes;
5448 5459
5449 if (offset < search_start) 5460 if (offset < search_start)
5450 btrfs_add_free_space(block_group, offset, 5461 btrfs_add_free_space(used_block_group, offset,
5451 search_start - offset); 5462 search_start - offset);
5452 BUG_ON(offset > search_start); 5463 BUG_ON(offset > search_start);
5464 if (used_block_group != block_group)
5465 btrfs_put_block_group(used_block_group);
5453 btrfs_put_block_group(block_group); 5466 btrfs_put_block_group(block_group);
5454 break; 5467 break;
5455loop: 5468loop:
5456 failed_cluster_refill = false; 5469 failed_cluster_refill = false;
5457 failed_alloc = false; 5470 failed_alloc = false;
5458 BUG_ON(index != get_block_group_index(block_group)); 5471 BUG_ON(index != get_block_group_index(block_group));
5472 if (used_block_group != block_group)
5473 btrfs_put_block_group(used_block_group);
5459 btrfs_put_block_group(block_group); 5474 btrfs_put_block_group(block_group);
5460 } 5475 }
5461 up_read(&space_info->groups_sem); 5476 up_read(&space_info->groups_sem);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index be1bf627a14b..49f3c9dc09f4 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -935,8 +935,10 @@ again:
935 node = tree_search(tree, start); 935 node = tree_search(tree, start);
936 if (!node) { 936 if (!node) {
937 prealloc = alloc_extent_state_atomic(prealloc); 937 prealloc = alloc_extent_state_atomic(prealloc);
938 if (!prealloc) 938 if (!prealloc) {
939 return -ENOMEM; 939 err = -ENOMEM;
940 goto out;
941 }
940 err = insert_state(tree, prealloc, start, end, &bits); 942 err = insert_state(tree, prealloc, start, end, &bits);
941 prealloc = NULL; 943 prealloc = NULL;
942 BUG_ON(err == -EEXIST); 944 BUG_ON(err == -EEXIST);
@@ -992,8 +994,10 @@ hit_next:
992 */ 994 */
993 if (state->start < start) { 995 if (state->start < start) {
994 prealloc = alloc_extent_state_atomic(prealloc); 996 prealloc = alloc_extent_state_atomic(prealloc);
995 if (!prealloc) 997 if (!prealloc) {
996 return -ENOMEM; 998 err = -ENOMEM;
999 goto out;
1000 }
997 err = split_state(tree, state, prealloc, start); 1001 err = split_state(tree, state, prealloc, start);
998 BUG_ON(err == -EEXIST); 1002 BUG_ON(err == -EEXIST);
999 prealloc = NULL; 1003 prealloc = NULL;
@@ -1024,8 +1028,10 @@ hit_next:
1024 this_end = last_start - 1; 1028 this_end = last_start - 1;
1025 1029
1026 prealloc = alloc_extent_state_atomic(prealloc); 1030 prealloc = alloc_extent_state_atomic(prealloc);
1027 if (!prealloc) 1031 if (!prealloc) {
1028 return -ENOMEM; 1032 err = -ENOMEM;
1033 goto out;
1034 }
1029 1035
1030 /* 1036 /*
1031 * Avoid to free 'prealloc' if it can be merged with 1037 * Avoid to free 'prealloc' if it can be merged with
@@ -1051,8 +1057,10 @@ hit_next:
1051 */ 1057 */
1052 if (state->start <= end && state->end > end) { 1058 if (state->start <= end && state->end > end) {
1053 prealloc = alloc_extent_state_atomic(prealloc); 1059 prealloc = alloc_extent_state_atomic(prealloc);
1054 if (!prealloc) 1060 if (!prealloc) {
1055 return -ENOMEM; 1061 err = -ENOMEM;
1062 goto out;
1063 }
1056 1064
1057 err = split_state(tree, state, prealloc, end + 1); 1065 err = split_state(tree, state, prealloc, end + 1);
1058 BUG_ON(err == -EEXIST); 1066 BUG_ON(err == -EEXIST);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index dafdfa059bf6..97fbe939c050 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1167,6 +1167,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1167 nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) / 1167 nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) /
1168 PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / 1168 PAGE_CACHE_SIZE, PAGE_CACHE_SIZE /
1169 (sizeof(struct page *))); 1169 (sizeof(struct page *)));
1170 nrptrs = min(nrptrs, current->nr_dirtied_pause - current->nr_dirtied);
1171 nrptrs = max(nrptrs, 8);
1170 pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); 1172 pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
1171 if (!pages) 1173 if (!pages)
1172 return -ENOMEM; 1174 return -ENOMEM;
@@ -1387,7 +1389,11 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1387 goto out; 1389 goto out;
1388 } 1390 }
1389 1391
1390 file_update_time(file); 1392 err = btrfs_update_time(file);
1393 if (err) {
1394 mutex_unlock(&inode->i_mutex);
1395 goto out;
1396 }
1391 BTRFS_I(inode)->sequence++; 1397 BTRFS_I(inode)->sequence++;
1392 1398
1393 start_pos = round_down(pos, root->sectorsize); 1399 start_pos = round_down(pos, root->sectorsize);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 2c984f7d4c2a..fd1a06df5bc6 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -38,6 +38,7 @@
38#include <linux/falloc.h> 38#include <linux/falloc.h>
39#include <linux/slab.h> 39#include <linux/slab.h>
40#include <linux/ratelimit.h> 40#include <linux/ratelimit.h>
41#include <linux/mount.h>
41#include "compat.h" 42#include "compat.h"
42#include "ctree.h" 43#include "ctree.h"
43#include "disk-io.h" 44#include "disk-io.h"
@@ -2031,7 +2032,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
2031 /* insert an orphan item to track this unlinked/truncated file */ 2032 /* insert an orphan item to track this unlinked/truncated file */
2032 if (insert >= 1) { 2033 if (insert >= 1) {
2033 ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); 2034 ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
2034 BUG_ON(ret); 2035 BUG_ON(ret && ret != -EEXIST);
2035 } 2036 }
2036 2037
2037 /* insert an orphan item to track subvolume contains orphan files */ 2038 /* insert an orphan item to track subvolume contains orphan files */
@@ -2158,6 +2159,38 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
2158 if (ret && ret != -ESTALE) 2159 if (ret && ret != -ESTALE)
2159 goto out; 2160 goto out;
2160 2161
2162 if (ret == -ESTALE && root == root->fs_info->tree_root) {
2163 struct btrfs_root *dead_root;
2164 struct btrfs_fs_info *fs_info = root->fs_info;
2165 int is_dead_root = 0;
2166
2167 /*
2168 * this is an orphan in the tree root. Currently these
2169 * could come from 2 sources:
2170 * a) a snapshot deletion in progress
2171 * b) a free space cache inode
2172 * We need to distinguish those two, as the snapshot
2173 * orphan must not get deleted.
2174 * find_dead_roots already ran before us, so if this
2175 * is a snapshot deletion, we should find the root
2176 * in the dead_roots list
2177 */
2178 spin_lock(&fs_info->trans_lock);
2179 list_for_each_entry(dead_root, &fs_info->dead_roots,
2180 root_list) {
2181 if (dead_root->root_key.objectid ==
2182 found_key.objectid) {
2183 is_dead_root = 1;
2184 break;
2185 }
2186 }
2187 spin_unlock(&fs_info->trans_lock);
2188 if (is_dead_root) {
2189 /* prevent this orphan from being found again */
2190 key.offset = found_key.objectid - 1;
2191 continue;
2192 }
2193 }
2161 /* 2194 /*
2162 * Inode is already gone but the orphan item is still there, 2195 * Inode is already gone but the orphan item is still there,
2163 * kill the orphan item. 2196 * kill the orphan item.
@@ -2191,7 +2224,14 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
2191 continue; 2224 continue;
2192 } 2225 }
2193 nr_truncate++; 2226 nr_truncate++;
2227 /*
2228 * Need to hold the imutex for reservation purposes, not
2229 * a huge deal here but I have a WARN_ON in
2230 * btrfs_delalloc_reserve_space to catch offenders.
2231 */
2232 mutex_lock(&inode->i_mutex);
2194 ret = btrfs_truncate(inode); 2233 ret = btrfs_truncate(inode);
2234 mutex_unlock(&inode->i_mutex);
2195 } else { 2235 } else {
2196 nr_unlink++; 2236 nr_unlink++;
2197 } 2237 }
@@ -3327,7 +3367,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3327 u64 hint_byte = 0; 3367 u64 hint_byte = 0;
3328 hole_size = last_byte - cur_offset; 3368 hole_size = last_byte - cur_offset;
3329 3369
3330 trans = btrfs_start_transaction(root, 2); 3370 trans = btrfs_start_transaction(root, 3);
3331 if (IS_ERR(trans)) { 3371 if (IS_ERR(trans)) {
3332 err = PTR_ERR(trans); 3372 err = PTR_ERR(trans);
3333 break; 3373 break;
@@ -3337,6 +3377,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3337 cur_offset + hole_size, 3377 cur_offset + hole_size,
3338 &hint_byte, 1); 3378 &hint_byte, 1);
3339 if (err) { 3379 if (err) {
3380 btrfs_update_inode(trans, root, inode);
3340 btrfs_end_transaction(trans, root); 3381 btrfs_end_transaction(trans, root);
3341 break; 3382 break;
3342 } 3383 }
@@ -3346,6 +3387,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3346 0, hole_size, 0, hole_size, 3387 0, hole_size, 0, hole_size,
3347 0, 0, 0); 3388 0, 0, 0);
3348 if (err) { 3389 if (err) {
3390 btrfs_update_inode(trans, root, inode);
3349 btrfs_end_transaction(trans, root); 3391 btrfs_end_transaction(trans, root);
3350 break; 3392 break;
3351 } 3393 }
@@ -3353,6 +3395,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3353 btrfs_drop_extent_cache(inode, hole_start, 3395 btrfs_drop_extent_cache(inode, hole_start,
3354 last_byte - 1, 0); 3396 last_byte - 1, 0);
3355 3397
3398 btrfs_update_inode(trans, root, inode);
3356 btrfs_end_transaction(trans, root); 3399 btrfs_end_transaction(trans, root);
3357 } 3400 }
3358 free_extent_map(em); 3401 free_extent_map(em);
@@ -3370,6 +3413,8 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3370 3413
3371static int btrfs_setsize(struct inode *inode, loff_t newsize) 3414static int btrfs_setsize(struct inode *inode, loff_t newsize)
3372{ 3415{
3416 struct btrfs_root *root = BTRFS_I(inode)->root;
3417 struct btrfs_trans_handle *trans;
3373 loff_t oldsize = i_size_read(inode); 3418 loff_t oldsize = i_size_read(inode);
3374 int ret; 3419 int ret;
3375 3420
@@ -3377,16 +3422,19 @@ static int btrfs_setsize(struct inode *inode, loff_t newsize)
3377 return 0; 3422 return 0;
3378 3423
3379 if (newsize > oldsize) { 3424 if (newsize > oldsize) {
3380 i_size_write(inode, newsize);
3381 btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL);
3382 truncate_pagecache(inode, oldsize, newsize); 3425 truncate_pagecache(inode, oldsize, newsize);
3383 ret = btrfs_cont_expand(inode, oldsize, newsize); 3426 ret = btrfs_cont_expand(inode, oldsize, newsize);
3384 if (ret) { 3427 if (ret)
3385 btrfs_setsize(inode, oldsize);
3386 return ret; 3428 return ret;
3387 }
3388 3429
3389 mark_inode_dirty(inode); 3430 trans = btrfs_start_transaction(root, 1);
3431 if (IS_ERR(trans))
3432 return PTR_ERR(trans);
3433
3434 i_size_write(inode, newsize);
3435 btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL);
3436 ret = btrfs_update_inode(trans, root, inode);
3437 btrfs_end_transaction_throttle(trans, root);
3390 } else { 3438 } else {
3391 3439
3392 /* 3440 /*
@@ -3426,9 +3474,9 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
3426 3474
3427 if (attr->ia_valid) { 3475 if (attr->ia_valid) {
3428 setattr_copy(inode, attr); 3476 setattr_copy(inode, attr);
3429 mark_inode_dirty(inode); 3477 err = btrfs_dirty_inode(inode);
3430 3478
3431 if (attr->ia_valid & ATTR_MODE) 3479 if (!err && attr->ia_valid & ATTR_MODE)
3432 err = btrfs_acl_chmod(inode); 3480 err = btrfs_acl_chmod(inode);
3433 } 3481 }
3434 3482
@@ -4204,42 +4252,80 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
4204 * FIXME, needs more benchmarking...there are no reasons other than performance 4252 * FIXME, needs more benchmarking...there are no reasons other than performance
4205 * to keep or drop this code. 4253 * to keep or drop this code.
4206 */ 4254 */
4207void btrfs_dirty_inode(struct inode *inode, int flags) 4255int btrfs_dirty_inode(struct inode *inode)
4208{ 4256{
4209 struct btrfs_root *root = BTRFS_I(inode)->root; 4257 struct btrfs_root *root = BTRFS_I(inode)->root;
4210 struct btrfs_trans_handle *trans; 4258 struct btrfs_trans_handle *trans;
4211 int ret; 4259 int ret;
4212 4260
4213 if (BTRFS_I(inode)->dummy_inode) 4261 if (BTRFS_I(inode)->dummy_inode)
4214 return; 4262 return 0;
4215 4263
4216 trans = btrfs_join_transaction(root); 4264 trans = btrfs_join_transaction(root);
4217 BUG_ON(IS_ERR(trans)); 4265 if (IS_ERR(trans))
4266 return PTR_ERR(trans);
4218 4267
4219 ret = btrfs_update_inode(trans, root, inode); 4268 ret = btrfs_update_inode(trans, root, inode);
4220 if (ret && ret == -ENOSPC) { 4269 if (ret && ret == -ENOSPC) {
4221 /* whoops, lets try again with the full transaction */ 4270 /* whoops, lets try again with the full transaction */
4222 btrfs_end_transaction(trans, root); 4271 btrfs_end_transaction(trans, root);
4223 trans = btrfs_start_transaction(root, 1); 4272 trans = btrfs_start_transaction(root, 1);
4224 if (IS_ERR(trans)) { 4273 if (IS_ERR(trans))
4225 printk_ratelimited(KERN_ERR "btrfs: fail to " 4274 return PTR_ERR(trans);
4226 "dirty inode %llu error %ld\n",
4227 (unsigned long long)btrfs_ino(inode),
4228 PTR_ERR(trans));
4229 return;
4230 }
4231 4275
4232 ret = btrfs_update_inode(trans, root, inode); 4276 ret = btrfs_update_inode(trans, root, inode);
4233 if (ret) {
4234 printk_ratelimited(KERN_ERR "btrfs: fail to "
4235 "dirty inode %llu error %d\n",
4236 (unsigned long long)btrfs_ino(inode),
4237 ret);
4238 }
4239 } 4277 }
4240 btrfs_end_transaction(trans, root); 4278 btrfs_end_transaction(trans, root);
4241 if (BTRFS_I(inode)->delayed_node) 4279 if (BTRFS_I(inode)->delayed_node)
4242 btrfs_balance_delayed_items(root); 4280 btrfs_balance_delayed_items(root);
4281
4282 return ret;
4283}
4284
4285/*
4286 * This is a copy of file_update_time. We need this so we can return error on
4287 * ENOSPC for updating the inode in the case of file write and mmap writes.
4288 */
4289int btrfs_update_time(struct file *file)
4290{
4291 struct inode *inode = file->f_path.dentry->d_inode;
4292 struct timespec now;
4293 int ret;
4294 enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0;
4295
4296 /* First try to exhaust all avenues to not sync */
4297 if (IS_NOCMTIME(inode))
4298 return 0;
4299
4300 now = current_fs_time(inode->i_sb);
4301 if (!timespec_equal(&inode->i_mtime, &now))
4302 sync_it = S_MTIME;
4303
4304 if (!timespec_equal(&inode->i_ctime, &now))
4305 sync_it |= S_CTIME;
4306
4307 if (IS_I_VERSION(inode))
4308 sync_it |= S_VERSION;
4309
4310 if (!sync_it)
4311 return 0;
4312
4313 /* Finally allowed to write? Takes lock. */
4314 if (mnt_want_write_file(file))
4315 return 0;
4316
4317 /* Only change inode inside the lock region */
4318 if (sync_it & S_VERSION)
4319 inode_inc_iversion(inode);
4320 if (sync_it & S_CTIME)
4321 inode->i_ctime = now;
4322 if (sync_it & S_MTIME)
4323 inode->i_mtime = now;
4324 ret = btrfs_dirty_inode(inode);
4325 if (!ret)
4326 mark_inode_dirty_sync(inode);
4327 mnt_drop_write(file->f_path.mnt);
4328 return ret;
4243} 4329}
4244 4330
4245/* 4331/*
@@ -4504,10 +4590,6 @@ static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
4504 int err = btrfs_add_link(trans, dir, inode, 4590 int err = btrfs_add_link(trans, dir, inode,
4505 dentry->d_name.name, dentry->d_name.len, 4591 dentry->d_name.name, dentry->d_name.len,
4506 backref, index); 4592 backref, index);
4507 if (!err) {
4508 d_instantiate(dentry, inode);
4509 return 0;
4510 }
4511 if (err > 0) 4593 if (err > 0)
4512 err = -EEXIST; 4594 err = -EEXIST;
4513 return err; 4595 return err;
@@ -4555,13 +4637,21 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4555 goto out_unlock; 4637 goto out_unlock;
4556 } 4638 }
4557 4639
4640 /*
4641 * If the active LSM wants to access the inode during
4642 * d_instantiate it needs these. Smack checks to see
4643 * if the filesystem supports xattrs by looking at the
4644 * ops vector.
4645 */
4646
4647 inode->i_op = &btrfs_special_inode_operations;
4558 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); 4648 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
4559 if (err) 4649 if (err)
4560 drop_inode = 1; 4650 drop_inode = 1;
4561 else { 4651 else {
4562 inode->i_op = &btrfs_special_inode_operations;
4563 init_special_inode(inode, inode->i_mode, rdev); 4652 init_special_inode(inode, inode->i_mode, rdev);
4564 btrfs_update_inode(trans, root, inode); 4653 btrfs_update_inode(trans, root, inode);
4654 d_instantiate(dentry, inode);
4565 } 4655 }
4566out_unlock: 4656out_unlock:
4567 nr = trans->blocks_used; 4657 nr = trans->blocks_used;
@@ -4613,15 +4703,23 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4613 goto out_unlock; 4703 goto out_unlock;
4614 } 4704 }
4615 4705
4706 /*
4707 * If the active LSM wants to access the inode during
4708 * d_instantiate it needs these. Smack checks to see
4709 * if the filesystem supports xattrs by looking at the
4710 * ops vector.
4711 */
4712 inode->i_fop = &btrfs_file_operations;
4713 inode->i_op = &btrfs_file_inode_operations;
4714
4616 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); 4715 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
4617 if (err) 4716 if (err)
4618 drop_inode = 1; 4717 drop_inode = 1;
4619 else { 4718 else {
4620 inode->i_mapping->a_ops = &btrfs_aops; 4719 inode->i_mapping->a_ops = &btrfs_aops;
4621 inode->i_mapping->backing_dev_info = &root->fs_info->bdi; 4720 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
4622 inode->i_fop = &btrfs_file_operations;
4623 inode->i_op = &btrfs_file_inode_operations;
4624 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; 4721 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
4722 d_instantiate(dentry, inode);
4625 } 4723 }
4626out_unlock: 4724out_unlock:
4627 nr = trans->blocks_used; 4725 nr = trans->blocks_used;
@@ -4679,6 +4777,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4679 struct dentry *parent = dentry->d_parent; 4777 struct dentry *parent = dentry->d_parent;
4680 err = btrfs_update_inode(trans, root, inode); 4778 err = btrfs_update_inode(trans, root, inode);
4681 BUG_ON(err); 4779 BUG_ON(err);
4780 d_instantiate(dentry, inode);
4682 btrfs_log_new_name(trans, inode, NULL, parent); 4781 btrfs_log_new_name(trans, inode, NULL, parent);
4683 } 4782 }
4684 4783
@@ -6303,7 +6402,12 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
6303 u64 page_start; 6402 u64 page_start;
6304 u64 page_end; 6403 u64 page_end;
6305 6404
6405 /* Need this to keep space reservations serialized */
6406 mutex_lock(&inode->i_mutex);
6306 ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); 6407 ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
6408 mutex_unlock(&inode->i_mutex);
6409 if (!ret)
6410 ret = btrfs_update_time(vma->vm_file);
6307 if (ret) { 6411 if (ret) {
6308 if (ret == -ENOMEM) 6412 if (ret == -ENOMEM)
6309 ret = VM_FAULT_OOM; 6413 ret = VM_FAULT_OOM;
@@ -6515,8 +6619,9 @@ static int btrfs_truncate(struct inode *inode)
6515 /* Just need the 1 for updating the inode */ 6619 /* Just need the 1 for updating the inode */
6516 trans = btrfs_start_transaction(root, 1); 6620 trans = btrfs_start_transaction(root, 1);
6517 if (IS_ERR(trans)) { 6621 if (IS_ERR(trans)) {
6518 err = PTR_ERR(trans); 6622 ret = err = PTR_ERR(trans);
6519 goto out; 6623 trans = NULL;
6624 break;
6520 } 6625 }
6521 } 6626 }
6522 6627
@@ -7076,14 +7181,21 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
7076 goto out_unlock; 7181 goto out_unlock;
7077 } 7182 }
7078 7183
7184 /*
7185 * If the active LSM wants to access the inode during
7186 * d_instantiate it needs these. Smack checks to see
7187 * if the filesystem supports xattrs by looking at the
7188 * ops vector.
7189 */
7190 inode->i_fop = &btrfs_file_operations;
7191 inode->i_op = &btrfs_file_inode_operations;
7192
7079 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); 7193 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
7080 if (err) 7194 if (err)
7081 drop_inode = 1; 7195 drop_inode = 1;
7082 else { 7196 else {
7083 inode->i_mapping->a_ops = &btrfs_aops; 7197 inode->i_mapping->a_ops = &btrfs_aops;
7084 inode->i_mapping->backing_dev_info = &root->fs_info->bdi; 7198 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
7085 inode->i_fop = &btrfs_file_operations;
7086 inode->i_op = &btrfs_file_inode_operations;
7087 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; 7199 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
7088 } 7200 }
7089 if (drop_inode) 7201 if (drop_inode)
@@ -7132,6 +7244,8 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
7132 drop_inode = 1; 7244 drop_inode = 1;
7133 7245
7134out_unlock: 7246out_unlock:
7247 if (!err)
7248 d_instantiate(dentry, inode);
7135 nr = trans->blocks_used; 7249 nr = trans->blocks_used;
7136 btrfs_end_transaction_throttle(trans, root); 7250 btrfs_end_transaction_throttle(trans, root);
7137 if (drop_inode) { 7251 if (drop_inode) {
@@ -7353,6 +7467,7 @@ static const struct inode_operations btrfs_symlink_inode_operations = {
7353 .follow_link = page_follow_link_light, 7467 .follow_link = page_follow_link_light,
7354 .put_link = page_put_link, 7468 .put_link = page_put_link,
7355 .getattr = btrfs_getattr, 7469 .getattr = btrfs_getattr,
7470 .setattr = btrfs_setattr,
7356 .permission = btrfs_permission, 7471 .permission = btrfs_permission,
7357 .setxattr = btrfs_setxattr, 7472 .setxattr = btrfs_setxattr,
7358 .getxattr = btrfs_getxattr, 7473 .getxattr = btrfs_getxattr,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 72d461656f60..c04f02c7d5bb 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -252,11 +252,11 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
252 trans = btrfs_join_transaction(root); 252 trans = btrfs_join_transaction(root);
253 BUG_ON(IS_ERR(trans)); 253 BUG_ON(IS_ERR(trans));
254 254
255 btrfs_update_iflags(inode);
256 inode->i_ctime = CURRENT_TIME;
255 ret = btrfs_update_inode(trans, root, inode); 257 ret = btrfs_update_inode(trans, root, inode);
256 BUG_ON(ret); 258 BUG_ON(ret);
257 259
258 btrfs_update_iflags(inode);
259 inode->i_ctime = CURRENT_TIME;
260 btrfs_end_transaction(trans, root); 260 btrfs_end_transaction(trans, root);
261 261
262 mnt_drop_write(file->f_path.mnt); 262 mnt_drop_write(file->f_path.mnt);
@@ -858,8 +858,10 @@ static int cluster_pages_for_defrag(struct inode *inode,
858 return 0; 858 return 0;
859 file_end = (isize - 1) >> PAGE_CACHE_SHIFT; 859 file_end = (isize - 1) >> PAGE_CACHE_SHIFT;
860 860
861 mutex_lock(&inode->i_mutex);
861 ret = btrfs_delalloc_reserve_space(inode, 862 ret = btrfs_delalloc_reserve_space(inode,
862 num_pages << PAGE_CACHE_SHIFT); 863 num_pages << PAGE_CACHE_SHIFT);
864 mutex_unlock(&inode->i_mutex);
863 if (ret) 865 if (ret)
864 return ret; 866 return ret;
865again: 867again:
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index dff29d5e151a..cfb55434a469 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2947,7 +2947,9 @@ static int relocate_file_extent_cluster(struct inode *inode,
2947 index = (cluster->start - offset) >> PAGE_CACHE_SHIFT; 2947 index = (cluster->start - offset) >> PAGE_CACHE_SHIFT;
2948 last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT; 2948 last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT;
2949 while (index <= last_index) { 2949 while (index <= last_index) {
2950 mutex_lock(&inode->i_mutex);
2950 ret = btrfs_delalloc_reserve_metadata(inode, PAGE_CACHE_SIZE); 2951 ret = btrfs_delalloc_reserve_metadata(inode, PAGE_CACHE_SIZE);
2952 mutex_unlock(&inode->i_mutex);
2951 if (ret) 2953 if (ret)
2952 goto out; 2954 goto out;
2953 2955
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index c27bcb67f330..ddf2c90d3fc0 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1535,18 +1535,22 @@ static noinline_for_stack int scrub_supers(struct scrub_dev *sdev)
1535static noinline_for_stack int scrub_workers_get(struct btrfs_root *root) 1535static noinline_for_stack int scrub_workers_get(struct btrfs_root *root)
1536{ 1536{
1537 struct btrfs_fs_info *fs_info = root->fs_info; 1537 struct btrfs_fs_info *fs_info = root->fs_info;
1538 int ret = 0;
1538 1539
1539 mutex_lock(&fs_info->scrub_lock); 1540 mutex_lock(&fs_info->scrub_lock);
1540 if (fs_info->scrub_workers_refcnt == 0) { 1541 if (fs_info->scrub_workers_refcnt == 0) {
1541 btrfs_init_workers(&fs_info->scrub_workers, "scrub", 1542 btrfs_init_workers(&fs_info->scrub_workers, "scrub",
1542 fs_info->thread_pool_size, &fs_info->generic_worker); 1543 fs_info->thread_pool_size, &fs_info->generic_worker);
1543 fs_info->scrub_workers.idle_thresh = 4; 1544 fs_info->scrub_workers.idle_thresh = 4;
1544 btrfs_start_workers(&fs_info->scrub_workers, 1); 1545 ret = btrfs_start_workers(&fs_info->scrub_workers);
1546 if (ret)
1547 goto out;
1545 } 1548 }
1546 ++fs_info->scrub_workers_refcnt; 1549 ++fs_info->scrub_workers_refcnt;
1550out:
1547 mutex_unlock(&fs_info->scrub_lock); 1551 mutex_unlock(&fs_info->scrub_lock);
1548 1552
1549 return 0; 1553 return ret;
1550} 1554}
1551 1555
1552static noinline_for_stack void scrub_workers_put(struct btrfs_root *root) 1556static noinline_for_stack void scrub_workers_put(struct btrfs_root *root)
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index e28ad4baf483..200f63bc6675 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -41,6 +41,7 @@
41#include <linux/slab.h> 41#include <linux/slab.h>
42#include <linux/cleancache.h> 42#include <linux/cleancache.h>
43#include <linux/mnt_namespace.h> 43#include <linux/mnt_namespace.h>
44#include <linux/ratelimit.h>
44#include "compat.h" 45#include "compat.h"
45#include "delayed-inode.h" 46#include "delayed-inode.h"
46#include "ctree.h" 47#include "ctree.h"
@@ -1053,7 +1054,7 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
1053 u64 avail_space; 1054 u64 avail_space;
1054 u64 used_space; 1055 u64 used_space;
1055 u64 min_stripe_size; 1056 u64 min_stripe_size;
1056 int min_stripes = 1; 1057 int min_stripes = 1, num_stripes = 1;
1057 int i = 0, nr_devices; 1058 int i = 0, nr_devices;
1058 int ret; 1059 int ret;
1059 1060
@@ -1067,12 +1068,16 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
1067 1068
1068 /* calc min stripe number for data space alloction */ 1069 /* calc min stripe number for data space alloction */
1069 type = btrfs_get_alloc_profile(root, 1); 1070 type = btrfs_get_alloc_profile(root, 1);
1070 if (type & BTRFS_BLOCK_GROUP_RAID0) 1071 if (type & BTRFS_BLOCK_GROUP_RAID0) {
1071 min_stripes = 2; 1072 min_stripes = 2;
1072 else if (type & BTRFS_BLOCK_GROUP_RAID1) 1073 num_stripes = nr_devices;
1074 } else if (type & BTRFS_BLOCK_GROUP_RAID1) {
1073 min_stripes = 2; 1075 min_stripes = 2;
1074 else if (type & BTRFS_BLOCK_GROUP_RAID10) 1076 num_stripes = 2;
1077 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
1075 min_stripes = 4; 1078 min_stripes = 4;
1079 num_stripes = 4;
1080 }
1076 1081
1077 if (type & BTRFS_BLOCK_GROUP_DUP) 1082 if (type & BTRFS_BLOCK_GROUP_DUP)
1078 min_stripe_size = 2 * BTRFS_STRIPE_LEN; 1083 min_stripe_size = 2 * BTRFS_STRIPE_LEN;
@@ -1141,13 +1146,16 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
1141 i = nr_devices - 1; 1146 i = nr_devices - 1;
1142 avail_space = 0; 1147 avail_space = 0;
1143 while (nr_devices >= min_stripes) { 1148 while (nr_devices >= min_stripes) {
1149 if (num_stripes > nr_devices)
1150 num_stripes = nr_devices;
1151
1144 if (devices_info[i].max_avail >= min_stripe_size) { 1152 if (devices_info[i].max_avail >= min_stripe_size) {
1145 int j; 1153 int j;
1146 u64 alloc_size; 1154 u64 alloc_size;
1147 1155
1148 avail_space += devices_info[i].max_avail * min_stripes; 1156 avail_space += devices_info[i].max_avail * num_stripes;
1149 alloc_size = devices_info[i].max_avail; 1157 alloc_size = devices_info[i].max_avail;
1150 for (j = i + 1 - min_stripes; j <= i; j++) 1158 for (j = i + 1 - num_stripes; j <= i; j++)
1151 devices_info[j].max_avail -= alloc_size; 1159 devices_info[j].max_avail -= alloc_size;
1152 } 1160 }
1153 i--; 1161 i--;
@@ -1264,6 +1272,16 @@ static int btrfs_unfreeze(struct super_block *sb)
1264 return 0; 1272 return 0;
1265} 1273}
1266 1274
1275static void btrfs_fs_dirty_inode(struct inode *inode, int flags)
1276{
1277 int ret;
1278
1279 ret = btrfs_dirty_inode(inode);
1280 if (ret)
1281 printk_ratelimited(KERN_ERR "btrfs: fail to dirty inode %Lu "
1282 "error %d\n", btrfs_ino(inode), ret);
1283}
1284
1267static const struct super_operations btrfs_super_ops = { 1285static const struct super_operations btrfs_super_ops = {
1268 .drop_inode = btrfs_drop_inode, 1286 .drop_inode = btrfs_drop_inode,
1269 .evict_inode = btrfs_evict_inode, 1287 .evict_inode = btrfs_evict_inode,
@@ -1271,7 +1289,7 @@ static const struct super_operations btrfs_super_ops = {
1271 .sync_fs = btrfs_sync_fs, 1289 .sync_fs = btrfs_sync_fs,
1272 .show_options = btrfs_show_options, 1290 .show_options = btrfs_show_options,
1273 .write_inode = btrfs_write_inode, 1291 .write_inode = btrfs_write_inode,
1274 .dirty_inode = btrfs_dirty_inode, 1292 .dirty_inode = btrfs_fs_dirty_inode,
1275 .alloc_inode = btrfs_alloc_inode, 1293 .alloc_inode = btrfs_alloc_inode,
1276 .destroy_inode = btrfs_destroy_inode, 1294 .destroy_inode = btrfs_destroy_inode,
1277 .statfs = btrfs_statfs, 1295 .statfs = btrfs_statfs,
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index c37433d3cd82..f4b839fd3c9d 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -295,6 +295,12 @@ loop_lock:
295 btrfs_requeue_work(&device->work); 295 btrfs_requeue_work(&device->work);
296 goto done; 296 goto done;
297 } 297 }
298 /* unplug every 64 requests just for good measure */
299 if (batch_run % 64 == 0) {
300 blk_finish_plug(&plug);
301 blk_start_plug(&plug);
302 sync_pending = 0;
303 }
298 } 304 }
299 305
300 cond_resched(); 306 cond_resched();
@@ -1611,7 +1617,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1611 if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding) 1617 if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding)
1612 return -EINVAL; 1618 return -EINVAL;
1613 1619
1614 bdev = blkdev_get_by_path(device_path, FMODE_EXCL, 1620 bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL,
1615 root->fs_info->bdev_holder); 1621 root->fs_info->bdev_holder);
1616 if (IS_ERR(bdev)) 1622 if (IS_ERR(bdev))
1617 return PTR_ERR(bdev); 1623 return PTR_ERR(bdev);
@@ -3258,7 +3264,7 @@ static void btrfs_end_bio(struct bio *bio, int err)
3258 */ 3264 */
3259 if (atomic_read(&bbio->error) > bbio->max_errors) { 3265 if (atomic_read(&bbio->error) > bbio->max_errors) {
3260 err = -EIO; 3266 err = -EIO;
3261 } else if (err) { 3267 } else {
3262 /* 3268 /*
3263 * this bio is actually up to date, we didn't 3269 * this bio is actually up to date, we didn't
3264 * go over the max number of errors 3270 * go over the max number of errors
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 4144caf2f9d3..173b1d22e59b 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -87,7 +87,7 @@ static int ceph_set_page_dirty(struct page *page)
87 snapc = ceph_get_snap_context(ci->i_snap_realm->cached_context); 87 snapc = ceph_get_snap_context(ci->i_snap_realm->cached_context);
88 88
89 /* dirty the head */ 89 /* dirty the head */
90 spin_lock(&inode->i_lock); 90 spin_lock(&ci->i_ceph_lock);
91 if (ci->i_head_snapc == NULL) 91 if (ci->i_head_snapc == NULL)
92 ci->i_head_snapc = ceph_get_snap_context(snapc); 92 ci->i_head_snapc = ceph_get_snap_context(snapc);
93 ++ci->i_wrbuffer_ref_head; 93 ++ci->i_wrbuffer_ref_head;
@@ -100,7 +100,7 @@ static int ceph_set_page_dirty(struct page *page)
100 ci->i_wrbuffer_ref-1, ci->i_wrbuffer_ref_head-1, 100 ci->i_wrbuffer_ref-1, ci->i_wrbuffer_ref_head-1,
101 ci->i_wrbuffer_ref, ci->i_wrbuffer_ref_head, 101 ci->i_wrbuffer_ref, ci->i_wrbuffer_ref_head,
102 snapc, snapc->seq, snapc->num_snaps); 102 snapc, snapc->seq, snapc->num_snaps);
103 spin_unlock(&inode->i_lock); 103 spin_unlock(&ci->i_ceph_lock);
104 104
105 /* now adjust page */ 105 /* now adjust page */
106 spin_lock_irq(&mapping->tree_lock); 106 spin_lock_irq(&mapping->tree_lock);
@@ -391,7 +391,7 @@ static struct ceph_snap_context *get_oldest_context(struct inode *inode,
391 struct ceph_snap_context *snapc = NULL; 391 struct ceph_snap_context *snapc = NULL;
392 struct ceph_cap_snap *capsnap = NULL; 392 struct ceph_cap_snap *capsnap = NULL;
393 393
394 spin_lock(&inode->i_lock); 394 spin_lock(&ci->i_ceph_lock);
395 list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { 395 list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
396 dout(" cap_snap %p snapc %p has %d dirty pages\n", capsnap, 396 dout(" cap_snap %p snapc %p has %d dirty pages\n", capsnap,
397 capsnap->context, capsnap->dirty_pages); 397 capsnap->context, capsnap->dirty_pages);
@@ -407,7 +407,7 @@ static struct ceph_snap_context *get_oldest_context(struct inode *inode,
407 dout(" head snapc %p has %d dirty pages\n", 407 dout(" head snapc %p has %d dirty pages\n",
408 snapc, ci->i_wrbuffer_ref_head); 408 snapc, ci->i_wrbuffer_ref_head);
409 } 409 }
410 spin_unlock(&inode->i_lock); 410 spin_unlock(&ci->i_ceph_lock);
411 return snapc; 411 return snapc;
412} 412}
413 413
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 0f327c6c9679..8b53193e4f7c 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -309,7 +309,7 @@ void ceph_reservation_status(struct ceph_fs_client *fsc,
309/* 309/*
310 * Find ceph_cap for given mds, if any. 310 * Find ceph_cap for given mds, if any.
311 * 311 *
312 * Called with i_lock held. 312 * Called with i_ceph_lock held.
313 */ 313 */
314static struct ceph_cap *__get_cap_for_mds(struct ceph_inode_info *ci, int mds) 314static struct ceph_cap *__get_cap_for_mds(struct ceph_inode_info *ci, int mds)
315{ 315{
@@ -332,9 +332,9 @@ struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci, int mds)
332{ 332{
333 struct ceph_cap *cap; 333 struct ceph_cap *cap;
334 334
335 spin_lock(&ci->vfs_inode.i_lock); 335 spin_lock(&ci->i_ceph_lock);
336 cap = __get_cap_for_mds(ci, mds); 336 cap = __get_cap_for_mds(ci, mds);
337 spin_unlock(&ci->vfs_inode.i_lock); 337 spin_unlock(&ci->i_ceph_lock);
338 return cap; 338 return cap;
339} 339}
340 340
@@ -361,15 +361,16 @@ static int __ceph_get_cap_mds(struct ceph_inode_info *ci)
361 361
362int ceph_get_cap_mds(struct inode *inode) 362int ceph_get_cap_mds(struct inode *inode)
363{ 363{
364 struct ceph_inode_info *ci = ceph_inode(inode);
364 int mds; 365 int mds;
365 spin_lock(&inode->i_lock); 366 spin_lock(&ci->i_ceph_lock);
366 mds = __ceph_get_cap_mds(ceph_inode(inode)); 367 mds = __ceph_get_cap_mds(ceph_inode(inode));
367 spin_unlock(&inode->i_lock); 368 spin_unlock(&ci->i_ceph_lock);
368 return mds; 369 return mds;
369} 370}
370 371
371/* 372/*
372 * Called under i_lock. 373 * Called under i_ceph_lock.
373 */ 374 */
374static void __insert_cap_node(struct ceph_inode_info *ci, 375static void __insert_cap_node(struct ceph_inode_info *ci,
375 struct ceph_cap *new) 376 struct ceph_cap *new)
@@ -415,7 +416,7 @@ static void __cap_set_timeouts(struct ceph_mds_client *mdsc,
415 * 416 *
416 * If I_FLUSH is set, leave the inode at the front of the list. 417 * If I_FLUSH is set, leave the inode at the front of the list.
417 * 418 *
418 * Caller holds i_lock 419 * Caller holds i_ceph_lock
419 * -> we take mdsc->cap_delay_lock 420 * -> we take mdsc->cap_delay_lock
420 */ 421 */
421static void __cap_delay_requeue(struct ceph_mds_client *mdsc, 422static void __cap_delay_requeue(struct ceph_mds_client *mdsc,
@@ -457,7 +458,7 @@ static void __cap_delay_requeue_front(struct ceph_mds_client *mdsc,
457/* 458/*
458 * Cancel delayed work on cap. 459 * Cancel delayed work on cap.
459 * 460 *
460 * Caller must hold i_lock. 461 * Caller must hold i_ceph_lock.
461 */ 462 */
462static void __cap_delay_cancel(struct ceph_mds_client *mdsc, 463static void __cap_delay_cancel(struct ceph_mds_client *mdsc,
463 struct ceph_inode_info *ci) 464 struct ceph_inode_info *ci)
@@ -532,14 +533,14 @@ int ceph_add_cap(struct inode *inode,
532 wanted |= ceph_caps_for_mode(fmode); 533 wanted |= ceph_caps_for_mode(fmode);
533 534
534retry: 535retry:
535 spin_lock(&inode->i_lock); 536 spin_lock(&ci->i_ceph_lock);
536 cap = __get_cap_for_mds(ci, mds); 537 cap = __get_cap_for_mds(ci, mds);
537 if (!cap) { 538 if (!cap) {
538 if (new_cap) { 539 if (new_cap) {
539 cap = new_cap; 540 cap = new_cap;
540 new_cap = NULL; 541 new_cap = NULL;
541 } else { 542 } else {
542 spin_unlock(&inode->i_lock); 543 spin_unlock(&ci->i_ceph_lock);
543 new_cap = get_cap(mdsc, caps_reservation); 544 new_cap = get_cap(mdsc, caps_reservation);
544 if (new_cap == NULL) 545 if (new_cap == NULL)
545 return -ENOMEM; 546 return -ENOMEM;
@@ -625,7 +626,7 @@ retry:
625 626
626 if (fmode >= 0) 627 if (fmode >= 0)
627 __ceph_get_fmode(ci, fmode); 628 __ceph_get_fmode(ci, fmode);
628 spin_unlock(&inode->i_lock); 629 spin_unlock(&ci->i_ceph_lock);
629 wake_up_all(&ci->i_cap_wq); 630 wake_up_all(&ci->i_cap_wq);
630 return 0; 631 return 0;
631} 632}
@@ -792,7 +793,7 @@ int ceph_caps_revoking(struct ceph_inode_info *ci, int mask)
792 struct rb_node *p; 793 struct rb_node *p;
793 int ret = 0; 794 int ret = 0;
794 795
795 spin_lock(&inode->i_lock); 796 spin_lock(&ci->i_ceph_lock);
796 for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) { 797 for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
797 cap = rb_entry(p, struct ceph_cap, ci_node); 798 cap = rb_entry(p, struct ceph_cap, ci_node);
798 if (__cap_is_valid(cap) && 799 if (__cap_is_valid(cap) &&
@@ -801,7 +802,7 @@ int ceph_caps_revoking(struct ceph_inode_info *ci, int mask)
801 break; 802 break;
802 } 803 }
803 } 804 }
804 spin_unlock(&inode->i_lock); 805 spin_unlock(&ci->i_ceph_lock);
805 dout("ceph_caps_revoking %p %s = %d\n", inode, 806 dout("ceph_caps_revoking %p %s = %d\n", inode,
806 ceph_cap_string(mask), ret); 807 ceph_cap_string(mask), ret);
807 return ret; 808 return ret;
@@ -855,7 +856,7 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci)
855} 856}
856 857
857/* 858/*
858 * called under i_lock 859 * called under i_ceph_lock
859 */ 860 */
860static int __ceph_is_any_caps(struct ceph_inode_info *ci) 861static int __ceph_is_any_caps(struct ceph_inode_info *ci)
861{ 862{
@@ -865,7 +866,7 @@ static int __ceph_is_any_caps(struct ceph_inode_info *ci)
865/* 866/*
866 * Remove a cap. Take steps to deal with a racing iterate_session_caps. 867 * Remove a cap. Take steps to deal with a racing iterate_session_caps.
867 * 868 *
868 * caller should hold i_lock. 869 * caller should hold i_ceph_lock.
869 * caller will not hold session s_mutex if called from destroy_inode. 870 * caller will not hold session s_mutex if called from destroy_inode.
870 */ 871 */
871void __ceph_remove_cap(struct ceph_cap *cap) 872void __ceph_remove_cap(struct ceph_cap *cap)
@@ -1028,7 +1029,7 @@ static void __queue_cap_release(struct ceph_mds_session *session,
1028 1029
1029/* 1030/*
1030 * Queue cap releases when an inode is dropped from our cache. Since 1031 * Queue cap releases when an inode is dropped from our cache. Since
1031 * inode is about to be destroyed, there is no need for i_lock. 1032 * inode is about to be destroyed, there is no need for i_ceph_lock.
1032 */ 1033 */
1033void ceph_queue_caps_release(struct inode *inode) 1034void ceph_queue_caps_release(struct inode *inode)
1034{ 1035{
@@ -1049,7 +1050,7 @@ void ceph_queue_caps_release(struct inode *inode)
1049 1050
1050/* 1051/*
1051 * Send a cap msg on the given inode. Update our caps state, then 1052 * Send a cap msg on the given inode. Update our caps state, then
1052 * drop i_lock and send the message. 1053 * drop i_ceph_lock and send the message.
1053 * 1054 *
1054 * Make note of max_size reported/requested from mds, revoked caps 1055 * Make note of max_size reported/requested from mds, revoked caps
1055 * that have now been implemented. 1056 * that have now been implemented.
@@ -1061,13 +1062,13 @@ void ceph_queue_caps_release(struct inode *inode)
1061 * Return non-zero if delayed release, or we experienced an error 1062 * Return non-zero if delayed release, or we experienced an error
1062 * such that the caller should requeue + retry later. 1063 * such that the caller should requeue + retry later.
1063 * 1064 *
1064 * called with i_lock, then drops it. 1065 * called with i_ceph_lock, then drops it.
1065 * caller should hold snap_rwsem (read), s_mutex. 1066 * caller should hold snap_rwsem (read), s_mutex.
1066 */ 1067 */
1067static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, 1068static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
1068 int op, int used, int want, int retain, int flushing, 1069 int op, int used, int want, int retain, int flushing,
1069 unsigned *pflush_tid) 1070 unsigned *pflush_tid)
1070 __releases(cap->ci->vfs_inode->i_lock) 1071 __releases(cap->ci->i_ceph_lock)
1071{ 1072{
1072 struct ceph_inode_info *ci = cap->ci; 1073 struct ceph_inode_info *ci = cap->ci;
1073 struct inode *inode = &ci->vfs_inode; 1074 struct inode *inode = &ci->vfs_inode;
@@ -1170,7 +1171,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
1170 xattr_version = ci->i_xattrs.version; 1171 xattr_version = ci->i_xattrs.version;
1171 } 1172 }
1172 1173
1173 spin_unlock(&inode->i_lock); 1174 spin_unlock(&ci->i_ceph_lock);
1174 1175
1175 ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id, 1176 ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id,
1176 op, keep, want, flushing, seq, flush_tid, issue_seq, mseq, 1177 op, keep, want, flushing, seq, flush_tid, issue_seq, mseq,
@@ -1198,13 +1199,13 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
1198 * Unless @again is true, skip cap_snaps that were already sent to 1199 * Unless @again is true, skip cap_snaps that were already sent to
1199 * the MDS (i.e., during this session). 1200 * the MDS (i.e., during this session).
1200 * 1201 *
1201 * Called under i_lock. Takes s_mutex as needed. 1202 * Called under i_ceph_lock. Takes s_mutex as needed.
1202 */ 1203 */
1203void __ceph_flush_snaps(struct ceph_inode_info *ci, 1204void __ceph_flush_snaps(struct ceph_inode_info *ci,
1204 struct ceph_mds_session **psession, 1205 struct ceph_mds_session **psession,
1205 int again) 1206 int again)
1206 __releases(ci->vfs_inode->i_lock) 1207 __releases(ci->i_ceph_lock)
1207 __acquires(ci->vfs_inode->i_lock) 1208 __acquires(ci->i_ceph_lock)
1208{ 1209{
1209 struct inode *inode = &ci->vfs_inode; 1210 struct inode *inode = &ci->vfs_inode;
1210 int mds; 1211 int mds;
@@ -1261,7 +1262,7 @@ retry:
1261 session = NULL; 1262 session = NULL;
1262 } 1263 }
1263 if (!session) { 1264 if (!session) {
1264 spin_unlock(&inode->i_lock); 1265 spin_unlock(&ci->i_ceph_lock);
1265 mutex_lock(&mdsc->mutex); 1266 mutex_lock(&mdsc->mutex);
1266 session = __ceph_lookup_mds_session(mdsc, mds); 1267 session = __ceph_lookup_mds_session(mdsc, mds);
1267 mutex_unlock(&mdsc->mutex); 1268 mutex_unlock(&mdsc->mutex);
@@ -1275,7 +1276,7 @@ retry:
1275 * deletion or migration. retry, and we'll 1276 * deletion or migration. retry, and we'll
1276 * get a better @mds value next time. 1277 * get a better @mds value next time.
1277 */ 1278 */
1278 spin_lock(&inode->i_lock); 1279 spin_lock(&ci->i_ceph_lock);
1279 goto retry; 1280 goto retry;
1280 } 1281 }
1281 1282
@@ -1285,7 +1286,7 @@ retry:
1285 list_del_init(&capsnap->flushing_item); 1286 list_del_init(&capsnap->flushing_item);
1286 list_add_tail(&capsnap->flushing_item, 1287 list_add_tail(&capsnap->flushing_item,
1287 &session->s_cap_snaps_flushing); 1288 &session->s_cap_snaps_flushing);
1288 spin_unlock(&inode->i_lock); 1289 spin_unlock(&ci->i_ceph_lock);
1289 1290
1290 dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n", 1291 dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n",
1291 inode, capsnap, capsnap->follows, capsnap->flush_tid); 1292 inode, capsnap, capsnap->follows, capsnap->flush_tid);
@@ -1302,7 +1303,7 @@ retry:
1302 next_follows = capsnap->follows + 1; 1303 next_follows = capsnap->follows + 1;
1303 ceph_put_cap_snap(capsnap); 1304 ceph_put_cap_snap(capsnap);
1304 1305
1305 spin_lock(&inode->i_lock); 1306 spin_lock(&ci->i_ceph_lock);
1306 goto retry; 1307 goto retry;
1307 } 1308 }
1308 1309
@@ -1322,11 +1323,9 @@ out:
1322 1323
1323static void ceph_flush_snaps(struct ceph_inode_info *ci) 1324static void ceph_flush_snaps(struct ceph_inode_info *ci)
1324{ 1325{
1325 struct inode *inode = &ci->vfs_inode; 1326 spin_lock(&ci->i_ceph_lock);
1326
1327 spin_lock(&inode->i_lock);
1328 __ceph_flush_snaps(ci, NULL, 0); 1327 __ceph_flush_snaps(ci, NULL, 0);
1329 spin_unlock(&inode->i_lock); 1328 spin_unlock(&ci->i_ceph_lock);
1330} 1329}
1331 1330
1332/* 1331/*
@@ -1373,7 +1372,7 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
1373 * Add dirty inode to the flushing list. Assigned a seq number so we 1372 * Add dirty inode to the flushing list. Assigned a seq number so we
1374 * can wait for caps to flush without starving. 1373 * can wait for caps to flush without starving.
1375 * 1374 *
1376 * Called under i_lock. 1375 * Called under i_ceph_lock.
1377 */ 1376 */
1378static int __mark_caps_flushing(struct inode *inode, 1377static int __mark_caps_flushing(struct inode *inode,
1379 struct ceph_mds_session *session) 1378 struct ceph_mds_session *session)
@@ -1421,9 +1420,9 @@ static int try_nonblocking_invalidate(struct inode *inode)
1421 struct ceph_inode_info *ci = ceph_inode(inode); 1420 struct ceph_inode_info *ci = ceph_inode(inode);
1422 u32 invalidating_gen = ci->i_rdcache_gen; 1421 u32 invalidating_gen = ci->i_rdcache_gen;
1423 1422
1424 spin_unlock(&inode->i_lock); 1423 spin_unlock(&ci->i_ceph_lock);
1425 invalidate_mapping_pages(&inode->i_data, 0, -1); 1424 invalidate_mapping_pages(&inode->i_data, 0, -1);
1426 spin_lock(&inode->i_lock); 1425 spin_lock(&ci->i_ceph_lock);
1427 1426
1428 if (inode->i_data.nrpages == 0 && 1427 if (inode->i_data.nrpages == 0 &&
1429 invalidating_gen == ci->i_rdcache_gen) { 1428 invalidating_gen == ci->i_rdcache_gen) {
@@ -1470,7 +1469,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
1470 if (mdsc->stopping) 1469 if (mdsc->stopping)
1471 is_delayed = 1; 1470 is_delayed = 1;
1472 1471
1473 spin_lock(&inode->i_lock); 1472 spin_lock(&ci->i_ceph_lock);
1474 1473
1475 if (ci->i_ceph_flags & CEPH_I_FLUSH) 1474 if (ci->i_ceph_flags & CEPH_I_FLUSH)
1476 flags |= CHECK_CAPS_FLUSH; 1475 flags |= CHECK_CAPS_FLUSH;
@@ -1480,7 +1479,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
1480 __ceph_flush_snaps(ci, &session, 0); 1479 __ceph_flush_snaps(ci, &session, 0);
1481 goto retry_locked; 1480 goto retry_locked;
1482retry: 1481retry:
1483 spin_lock(&inode->i_lock); 1482 spin_lock(&ci->i_ceph_lock);
1484retry_locked: 1483retry_locked:
1485 file_wanted = __ceph_caps_file_wanted(ci); 1484 file_wanted = __ceph_caps_file_wanted(ci);
1486 used = __ceph_caps_used(ci); 1485 used = __ceph_caps_used(ci);
@@ -1634,7 +1633,7 @@ ack:
1634 if (mutex_trylock(&session->s_mutex) == 0) { 1633 if (mutex_trylock(&session->s_mutex) == 0) {
1635 dout("inverting session/ino locks on %p\n", 1634 dout("inverting session/ino locks on %p\n",
1636 session); 1635 session);
1637 spin_unlock(&inode->i_lock); 1636 spin_unlock(&ci->i_ceph_lock);
1638 if (took_snap_rwsem) { 1637 if (took_snap_rwsem) {
1639 up_read(&mdsc->snap_rwsem); 1638 up_read(&mdsc->snap_rwsem);
1640 took_snap_rwsem = 0; 1639 took_snap_rwsem = 0;
@@ -1648,7 +1647,7 @@ ack:
1648 if (down_read_trylock(&mdsc->snap_rwsem) == 0) { 1647 if (down_read_trylock(&mdsc->snap_rwsem) == 0) {
1649 dout("inverting snap/in locks on %p\n", 1648 dout("inverting snap/in locks on %p\n",
1650 inode); 1649 inode);
1651 spin_unlock(&inode->i_lock); 1650 spin_unlock(&ci->i_ceph_lock);
1652 down_read(&mdsc->snap_rwsem); 1651 down_read(&mdsc->snap_rwsem);
1653 took_snap_rwsem = 1; 1652 took_snap_rwsem = 1;
1654 goto retry; 1653 goto retry;
@@ -1664,10 +1663,10 @@ ack:
1664 mds = cap->mds; /* remember mds, so we don't repeat */ 1663 mds = cap->mds; /* remember mds, so we don't repeat */
1665 sent++; 1664 sent++;
1666 1665
1667 /* __send_cap drops i_lock */ 1666 /* __send_cap drops i_ceph_lock */
1668 delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, used, want, 1667 delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, used, want,
1669 retain, flushing, NULL); 1668 retain, flushing, NULL);
1670 goto retry; /* retake i_lock and restart our cap scan. */ 1669 goto retry; /* retake i_ceph_lock and restart our cap scan. */
1671 } 1670 }
1672 1671
1673 /* 1672 /*
@@ -1681,7 +1680,7 @@ ack:
1681 else if (!is_delayed || force_requeue) 1680 else if (!is_delayed || force_requeue)
1682 __cap_delay_requeue(mdsc, ci); 1681 __cap_delay_requeue(mdsc, ci);
1683 1682
1684 spin_unlock(&inode->i_lock); 1683 spin_unlock(&ci->i_ceph_lock);
1685 1684
1686 if (queue_invalidate) 1685 if (queue_invalidate)
1687 ceph_queue_invalidate(inode); 1686 ceph_queue_invalidate(inode);
@@ -1704,7 +1703,7 @@ static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session,
1704 int flushing = 0; 1703 int flushing = 0;
1705 1704
1706retry: 1705retry:
1707 spin_lock(&inode->i_lock); 1706 spin_lock(&ci->i_ceph_lock);
1708 if (ci->i_ceph_flags & CEPH_I_NOFLUSH) { 1707 if (ci->i_ceph_flags & CEPH_I_NOFLUSH) {
1709 dout("try_flush_caps skipping %p I_NOFLUSH set\n", inode); 1708 dout("try_flush_caps skipping %p I_NOFLUSH set\n", inode);
1710 goto out; 1709 goto out;
@@ -1716,7 +1715,7 @@ retry:
1716 int delayed; 1715 int delayed;
1717 1716
1718 if (!session) { 1717 if (!session) {
1719 spin_unlock(&inode->i_lock); 1718 spin_unlock(&ci->i_ceph_lock);
1720 session = cap->session; 1719 session = cap->session;
1721 mutex_lock(&session->s_mutex); 1720 mutex_lock(&session->s_mutex);
1722 goto retry; 1721 goto retry;
@@ -1727,18 +1726,18 @@ retry:
1727 1726
1728 flushing = __mark_caps_flushing(inode, session); 1727 flushing = __mark_caps_flushing(inode, session);
1729 1728
1730 /* __send_cap drops i_lock */ 1729 /* __send_cap drops i_ceph_lock */
1731 delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, used, want, 1730 delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, used, want,
1732 cap->issued | cap->implemented, flushing, 1731 cap->issued | cap->implemented, flushing,
1733 flush_tid); 1732 flush_tid);
1734 if (!delayed) 1733 if (!delayed)
1735 goto out_unlocked; 1734 goto out_unlocked;
1736 1735
1737 spin_lock(&inode->i_lock); 1736 spin_lock(&ci->i_ceph_lock);
1738 __cap_delay_requeue(mdsc, ci); 1737 __cap_delay_requeue(mdsc, ci);
1739 } 1738 }
1740out: 1739out:
1741 spin_unlock(&inode->i_lock); 1740 spin_unlock(&ci->i_ceph_lock);
1742out_unlocked: 1741out_unlocked:
1743 if (session && unlock_session) 1742 if (session && unlock_session)
1744 mutex_unlock(&session->s_mutex); 1743 mutex_unlock(&session->s_mutex);
@@ -1753,7 +1752,7 @@ static int caps_are_flushed(struct inode *inode, unsigned tid)
1753 struct ceph_inode_info *ci = ceph_inode(inode); 1752 struct ceph_inode_info *ci = ceph_inode(inode);
1754 int i, ret = 1; 1753 int i, ret = 1;
1755 1754
1756 spin_lock(&inode->i_lock); 1755 spin_lock(&ci->i_ceph_lock);
1757 for (i = 0; i < CEPH_CAP_BITS; i++) 1756 for (i = 0; i < CEPH_CAP_BITS; i++)
1758 if ((ci->i_flushing_caps & (1 << i)) && 1757 if ((ci->i_flushing_caps & (1 << i)) &&
1759 ci->i_cap_flush_tid[i] <= tid) { 1758 ci->i_cap_flush_tid[i] <= tid) {
@@ -1761,7 +1760,7 @@ static int caps_are_flushed(struct inode *inode, unsigned tid)
1761 ret = 0; 1760 ret = 0;
1762 break; 1761 break;
1763 } 1762 }
1764 spin_unlock(&inode->i_lock); 1763 spin_unlock(&ci->i_ceph_lock);
1765 return ret; 1764 return ret;
1766} 1765}
1767 1766
@@ -1868,10 +1867,10 @@ int ceph_write_inode(struct inode *inode, struct writeback_control *wbc)
1868 struct ceph_mds_client *mdsc = 1867 struct ceph_mds_client *mdsc =
1869 ceph_sb_to_client(inode->i_sb)->mdsc; 1868 ceph_sb_to_client(inode->i_sb)->mdsc;
1870 1869
1871 spin_lock(&inode->i_lock); 1870 spin_lock(&ci->i_ceph_lock);
1872 if (__ceph_caps_dirty(ci)) 1871 if (__ceph_caps_dirty(ci))
1873 __cap_delay_requeue_front(mdsc, ci); 1872 __cap_delay_requeue_front(mdsc, ci);
1874 spin_unlock(&inode->i_lock); 1873 spin_unlock(&ci->i_ceph_lock);
1875 } 1874 }
1876 return err; 1875 return err;
1877} 1876}
@@ -1894,7 +1893,7 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc,
1894 struct inode *inode = &ci->vfs_inode; 1893 struct inode *inode = &ci->vfs_inode;
1895 struct ceph_cap *cap; 1894 struct ceph_cap *cap;
1896 1895
1897 spin_lock(&inode->i_lock); 1896 spin_lock(&ci->i_ceph_lock);
1898 cap = ci->i_auth_cap; 1897 cap = ci->i_auth_cap;
1899 if (cap && cap->session == session) { 1898 if (cap && cap->session == session) {
1900 dout("kick_flushing_caps %p cap %p capsnap %p\n", inode, 1899 dout("kick_flushing_caps %p cap %p capsnap %p\n", inode,
@@ -1904,7 +1903,7 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc,
1904 pr_err("%p auth cap %p not mds%d ???\n", inode, 1903 pr_err("%p auth cap %p not mds%d ???\n", inode,
1905 cap, session->s_mds); 1904 cap, session->s_mds);
1906 } 1905 }
1907 spin_unlock(&inode->i_lock); 1906 spin_unlock(&ci->i_ceph_lock);
1908 } 1907 }
1909} 1908}
1910 1909
@@ -1921,7 +1920,7 @@ void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
1921 struct ceph_cap *cap; 1920 struct ceph_cap *cap;
1922 int delayed = 0; 1921 int delayed = 0;
1923 1922
1924 spin_lock(&inode->i_lock); 1923 spin_lock(&ci->i_ceph_lock);
1925 cap = ci->i_auth_cap; 1924 cap = ci->i_auth_cap;
1926 if (cap && cap->session == session) { 1925 if (cap && cap->session == session) {
1927 dout("kick_flushing_caps %p cap %p %s\n", inode, 1926 dout("kick_flushing_caps %p cap %p %s\n", inode,
@@ -1932,14 +1931,14 @@ void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
1932 cap->issued | cap->implemented, 1931 cap->issued | cap->implemented,
1933 ci->i_flushing_caps, NULL); 1932 ci->i_flushing_caps, NULL);
1934 if (delayed) { 1933 if (delayed) {
1935 spin_lock(&inode->i_lock); 1934 spin_lock(&ci->i_ceph_lock);
1936 __cap_delay_requeue(mdsc, ci); 1935 __cap_delay_requeue(mdsc, ci);
1937 spin_unlock(&inode->i_lock); 1936 spin_unlock(&ci->i_ceph_lock);
1938 } 1937 }
1939 } else { 1938 } else {
1940 pr_err("%p auth cap %p not mds%d ???\n", inode, 1939 pr_err("%p auth cap %p not mds%d ???\n", inode,
1941 cap, session->s_mds); 1940 cap, session->s_mds);
1942 spin_unlock(&inode->i_lock); 1941 spin_unlock(&ci->i_ceph_lock);
1943 } 1942 }
1944 } 1943 }
1945} 1944}
@@ -1952,7 +1951,7 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
1952 struct ceph_cap *cap; 1951 struct ceph_cap *cap;
1953 int delayed = 0; 1952 int delayed = 0;
1954 1953
1955 spin_lock(&inode->i_lock); 1954 spin_lock(&ci->i_ceph_lock);
1956 cap = ci->i_auth_cap; 1955 cap = ci->i_auth_cap;
1957 dout("kick_flushing_inode_caps %p flushing %s flush_seq %lld\n", inode, 1956 dout("kick_flushing_inode_caps %p flushing %s flush_seq %lld\n", inode,
1958 ceph_cap_string(ci->i_flushing_caps), ci->i_cap_flush_seq); 1957 ceph_cap_string(ci->i_flushing_caps), ci->i_cap_flush_seq);
@@ -1964,12 +1963,12 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
1964 cap->issued | cap->implemented, 1963 cap->issued | cap->implemented,
1965 ci->i_flushing_caps, NULL); 1964 ci->i_flushing_caps, NULL);
1966 if (delayed) { 1965 if (delayed) {
1967 spin_lock(&inode->i_lock); 1966 spin_lock(&ci->i_ceph_lock);
1968 __cap_delay_requeue(mdsc, ci); 1967 __cap_delay_requeue(mdsc, ci);
1969 spin_unlock(&inode->i_lock); 1968 spin_unlock(&ci->i_ceph_lock);
1970 } 1969 }
1971 } else { 1970 } else {
1972 spin_unlock(&inode->i_lock); 1971 spin_unlock(&ci->i_ceph_lock);
1973 } 1972 }
1974} 1973}
1975 1974
@@ -1978,7 +1977,7 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
1978 * Take references to capabilities we hold, so that we don't release 1977 * Take references to capabilities we hold, so that we don't release
1979 * them to the MDS prematurely. 1978 * them to the MDS prematurely.
1980 * 1979 *
1981 * Protected by i_lock. 1980 * Protected by i_ceph_lock.
1982 */ 1981 */
1983static void __take_cap_refs(struct ceph_inode_info *ci, int got) 1982static void __take_cap_refs(struct ceph_inode_info *ci, int got)
1984{ 1983{
@@ -2016,7 +2015,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
2016 2015
2017 dout("get_cap_refs %p need %s want %s\n", inode, 2016 dout("get_cap_refs %p need %s want %s\n", inode,
2018 ceph_cap_string(need), ceph_cap_string(want)); 2017 ceph_cap_string(need), ceph_cap_string(want));
2019 spin_lock(&inode->i_lock); 2018 spin_lock(&ci->i_ceph_lock);
2020 2019
2021 /* make sure file is actually open */ 2020 /* make sure file is actually open */
2022 file_wanted = __ceph_caps_file_wanted(ci); 2021 file_wanted = __ceph_caps_file_wanted(ci);
@@ -2077,7 +2076,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
2077 ceph_cap_string(have), ceph_cap_string(need)); 2076 ceph_cap_string(have), ceph_cap_string(need));
2078 } 2077 }
2079out: 2078out:
2080 spin_unlock(&inode->i_lock); 2079 spin_unlock(&ci->i_ceph_lock);
2081 dout("get_cap_refs %p ret %d got %s\n", inode, 2080 dout("get_cap_refs %p ret %d got %s\n", inode,
2082 ret, ceph_cap_string(*got)); 2081 ret, ceph_cap_string(*got));
2083 return ret; 2082 return ret;
@@ -2094,7 +2093,7 @@ static void check_max_size(struct inode *inode, loff_t endoff)
2094 int check = 0; 2093 int check = 0;
2095 2094
2096 /* do we need to explicitly request a larger max_size? */ 2095 /* do we need to explicitly request a larger max_size? */
2097 spin_lock(&inode->i_lock); 2096 spin_lock(&ci->i_ceph_lock);
2098 if ((endoff >= ci->i_max_size || 2097 if ((endoff >= ci->i_max_size ||
2099 endoff > (inode->i_size << 1)) && 2098 endoff > (inode->i_size << 1)) &&
2100 endoff > ci->i_wanted_max_size) { 2099 endoff > ci->i_wanted_max_size) {
@@ -2103,7 +2102,7 @@ static void check_max_size(struct inode *inode, loff_t endoff)
2103 ci->i_wanted_max_size = endoff; 2102 ci->i_wanted_max_size = endoff;
2104 check = 1; 2103 check = 1;
2105 } 2104 }
2106 spin_unlock(&inode->i_lock); 2105 spin_unlock(&ci->i_ceph_lock);
2107 if (check) 2106 if (check)
2108 ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); 2107 ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
2109} 2108}
@@ -2140,9 +2139,9 @@ retry:
2140 */ 2139 */
2141void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps) 2140void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps)
2142{ 2141{
2143 spin_lock(&ci->vfs_inode.i_lock); 2142 spin_lock(&ci->i_ceph_lock);
2144 __take_cap_refs(ci, caps); 2143 __take_cap_refs(ci, caps);
2145 spin_unlock(&ci->vfs_inode.i_lock); 2144 spin_unlock(&ci->i_ceph_lock);
2146} 2145}
2147 2146
2148/* 2147/*
@@ -2160,7 +2159,7 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
2160 int last = 0, put = 0, flushsnaps = 0, wake = 0; 2159 int last = 0, put = 0, flushsnaps = 0, wake = 0;
2161 struct ceph_cap_snap *capsnap; 2160 struct ceph_cap_snap *capsnap;
2162 2161
2163 spin_lock(&inode->i_lock); 2162 spin_lock(&ci->i_ceph_lock);
2164 if (had & CEPH_CAP_PIN) 2163 if (had & CEPH_CAP_PIN)
2165 --ci->i_pin_ref; 2164 --ci->i_pin_ref;
2166 if (had & CEPH_CAP_FILE_RD) 2165 if (had & CEPH_CAP_FILE_RD)
@@ -2193,7 +2192,7 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
2193 } 2192 }
2194 } 2193 }
2195 } 2194 }
2196 spin_unlock(&inode->i_lock); 2195 spin_unlock(&ci->i_ceph_lock);
2197 2196
2198 dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had), 2197 dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had),
2199 last ? " last" : "", put ? " put" : ""); 2198 last ? " last" : "", put ? " put" : "");
@@ -2225,7 +2224,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
2225 int found = 0; 2224 int found = 0;
2226 struct ceph_cap_snap *capsnap = NULL; 2225 struct ceph_cap_snap *capsnap = NULL;
2227 2226
2228 spin_lock(&inode->i_lock); 2227 spin_lock(&ci->i_ceph_lock);
2229 ci->i_wrbuffer_ref -= nr; 2228 ci->i_wrbuffer_ref -= nr;
2230 last = !ci->i_wrbuffer_ref; 2229 last = !ci->i_wrbuffer_ref;
2231 2230
@@ -2274,7 +2273,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
2274 } 2273 }
2275 } 2274 }
2276 2275
2277 spin_unlock(&inode->i_lock); 2276 spin_unlock(&ci->i_ceph_lock);
2278 2277
2279 if (last) { 2278 if (last) {
2280 ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); 2279 ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
@@ -2291,7 +2290,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
2291 * Handle a cap GRANT message from the MDS. (Note that a GRANT may 2290 * Handle a cap GRANT message from the MDS. (Note that a GRANT may
2292 * actually be a revocation if it specifies a smaller cap set.) 2291 * actually be a revocation if it specifies a smaller cap set.)
2293 * 2292 *
2294 * caller holds s_mutex and i_lock, we drop both. 2293 * caller holds s_mutex and i_ceph_lock, we drop both.
2295 * 2294 *
2296 * return value: 2295 * return value:
2297 * 0 - ok 2296 * 0 - ok
@@ -2302,7 +2301,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2302 struct ceph_mds_session *session, 2301 struct ceph_mds_session *session,
2303 struct ceph_cap *cap, 2302 struct ceph_cap *cap,
2304 struct ceph_buffer *xattr_buf) 2303 struct ceph_buffer *xattr_buf)
2305 __releases(inode->i_lock) 2304 __releases(ci->i_ceph_lock)
2306{ 2305{
2307 struct ceph_inode_info *ci = ceph_inode(inode); 2306 struct ceph_inode_info *ci = ceph_inode(inode);
2308 int mds = session->s_mds; 2307 int mds = session->s_mds;
@@ -2453,7 +2452,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2453 } 2452 }
2454 BUG_ON(cap->issued & ~cap->implemented); 2453 BUG_ON(cap->issued & ~cap->implemented);
2455 2454
2456 spin_unlock(&inode->i_lock); 2455 spin_unlock(&ci->i_ceph_lock);
2457 if (writeback) 2456 if (writeback)
2458 /* 2457 /*
2459 * queue inode for writeback: we can't actually call 2458 * queue inode for writeback: we can't actually call
@@ -2483,7 +2482,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
2483 struct ceph_mds_caps *m, 2482 struct ceph_mds_caps *m,
2484 struct ceph_mds_session *session, 2483 struct ceph_mds_session *session,
2485 struct ceph_cap *cap) 2484 struct ceph_cap *cap)
2486 __releases(inode->i_lock) 2485 __releases(ci->i_ceph_lock)
2487{ 2486{
2488 struct ceph_inode_info *ci = ceph_inode(inode); 2487 struct ceph_inode_info *ci = ceph_inode(inode);
2489 struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; 2488 struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
@@ -2539,7 +2538,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
2539 wake_up_all(&ci->i_cap_wq); 2538 wake_up_all(&ci->i_cap_wq);
2540 2539
2541out: 2540out:
2542 spin_unlock(&inode->i_lock); 2541 spin_unlock(&ci->i_ceph_lock);
2543 if (drop) 2542 if (drop)
2544 iput(inode); 2543 iput(inode);
2545} 2544}
@@ -2562,7 +2561,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
2562 dout("handle_cap_flushsnap_ack inode %p ci %p mds%d follows %lld\n", 2561 dout("handle_cap_flushsnap_ack inode %p ci %p mds%d follows %lld\n",
2563 inode, ci, session->s_mds, follows); 2562 inode, ci, session->s_mds, follows);
2564 2563
2565 spin_lock(&inode->i_lock); 2564 spin_lock(&ci->i_ceph_lock);
2566 list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { 2565 list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
2567 if (capsnap->follows == follows) { 2566 if (capsnap->follows == follows) {
2568 if (capsnap->flush_tid != flush_tid) { 2567 if (capsnap->flush_tid != flush_tid) {
@@ -2585,7 +2584,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
2585 capsnap, capsnap->follows); 2584 capsnap, capsnap->follows);
2586 } 2585 }
2587 } 2586 }
2588 spin_unlock(&inode->i_lock); 2587 spin_unlock(&ci->i_ceph_lock);
2589 if (drop) 2588 if (drop)
2590 iput(inode); 2589 iput(inode);
2591} 2590}
@@ -2598,7 +2597,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
2598static void handle_cap_trunc(struct inode *inode, 2597static void handle_cap_trunc(struct inode *inode,
2599 struct ceph_mds_caps *trunc, 2598 struct ceph_mds_caps *trunc,
2600 struct ceph_mds_session *session) 2599 struct ceph_mds_session *session)
2601 __releases(inode->i_lock) 2600 __releases(ci->i_ceph_lock)
2602{ 2601{
2603 struct ceph_inode_info *ci = ceph_inode(inode); 2602 struct ceph_inode_info *ci = ceph_inode(inode);
2604 int mds = session->s_mds; 2603 int mds = session->s_mds;
@@ -2617,7 +2616,7 @@ static void handle_cap_trunc(struct inode *inode,
2617 inode, mds, seq, truncate_size, truncate_seq); 2616 inode, mds, seq, truncate_size, truncate_seq);
2618 queue_trunc = ceph_fill_file_size(inode, issued, 2617 queue_trunc = ceph_fill_file_size(inode, issued,
2619 truncate_seq, truncate_size, size); 2618 truncate_seq, truncate_size, size);
2620 spin_unlock(&inode->i_lock); 2619 spin_unlock(&ci->i_ceph_lock);
2621 2620
2622 if (queue_trunc) 2621 if (queue_trunc)
2623 ceph_queue_vmtruncate(inode); 2622 ceph_queue_vmtruncate(inode);
@@ -2646,7 +2645,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
2646 dout("handle_cap_export inode %p ci %p mds%d mseq %d\n", 2645 dout("handle_cap_export inode %p ci %p mds%d mseq %d\n",
2647 inode, ci, mds, mseq); 2646 inode, ci, mds, mseq);
2648 2647
2649 spin_lock(&inode->i_lock); 2648 spin_lock(&ci->i_ceph_lock);
2650 2649
2651 /* make sure we haven't seen a higher mseq */ 2650 /* make sure we haven't seen a higher mseq */
2652 for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) { 2651 for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
@@ -2690,7 +2689,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
2690 } 2689 }
2691 /* else, we already released it */ 2690 /* else, we already released it */
2692 2691
2693 spin_unlock(&inode->i_lock); 2692 spin_unlock(&ci->i_ceph_lock);
2694} 2693}
2695 2694
2696/* 2695/*
@@ -2745,9 +2744,9 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
2745 up_read(&mdsc->snap_rwsem); 2744 up_read(&mdsc->snap_rwsem);
2746 2745
2747 /* make sure we re-request max_size, if necessary */ 2746 /* make sure we re-request max_size, if necessary */
2748 spin_lock(&inode->i_lock); 2747 spin_lock(&ci->i_ceph_lock);
2749 ci->i_requested_max_size = 0; 2748 ci->i_requested_max_size = 0;
2750 spin_unlock(&inode->i_lock); 2749 spin_unlock(&ci->i_ceph_lock);
2751} 2750}
2752 2751
2753/* 2752/*
@@ -2762,6 +2761,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2762 struct ceph_mds_client *mdsc = session->s_mdsc; 2761 struct ceph_mds_client *mdsc = session->s_mdsc;
2763 struct super_block *sb = mdsc->fsc->sb; 2762 struct super_block *sb = mdsc->fsc->sb;
2764 struct inode *inode; 2763 struct inode *inode;
2764 struct ceph_inode_info *ci;
2765 struct ceph_cap *cap; 2765 struct ceph_cap *cap;
2766 struct ceph_mds_caps *h; 2766 struct ceph_mds_caps *h;
2767 int mds = session->s_mds; 2767 int mds = session->s_mds;
@@ -2815,6 +2815,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2815 2815
2816 /* lookup ino */ 2816 /* lookup ino */
2817 inode = ceph_find_inode(sb, vino); 2817 inode = ceph_find_inode(sb, vino);
2818 ci = ceph_inode(inode);
2818 dout(" op %s ino %llx.%llx inode %p\n", ceph_cap_op_name(op), vino.ino, 2819 dout(" op %s ino %llx.%llx inode %p\n", ceph_cap_op_name(op), vino.ino,
2819 vino.snap, inode); 2820 vino.snap, inode);
2820 if (!inode) { 2821 if (!inode) {
@@ -2844,16 +2845,16 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2844 } 2845 }
2845 2846
2846 /* the rest require a cap */ 2847 /* the rest require a cap */
2847 spin_lock(&inode->i_lock); 2848 spin_lock(&ci->i_ceph_lock);
2848 cap = __get_cap_for_mds(ceph_inode(inode), mds); 2849 cap = __get_cap_for_mds(ceph_inode(inode), mds);
2849 if (!cap) { 2850 if (!cap) {
2850 dout(" no cap on %p ino %llx.%llx from mds%d\n", 2851 dout(" no cap on %p ino %llx.%llx from mds%d\n",
2851 inode, ceph_ino(inode), ceph_snap(inode), mds); 2852 inode, ceph_ino(inode), ceph_snap(inode), mds);
2852 spin_unlock(&inode->i_lock); 2853 spin_unlock(&ci->i_ceph_lock);
2853 goto flush_cap_releases; 2854 goto flush_cap_releases;
2854 } 2855 }
2855 2856
2856 /* note that each of these drops i_lock for us */ 2857 /* note that each of these drops i_ceph_lock for us */
2857 switch (op) { 2858 switch (op) {
2858 case CEPH_CAP_OP_REVOKE: 2859 case CEPH_CAP_OP_REVOKE:
2859 case CEPH_CAP_OP_GRANT: 2860 case CEPH_CAP_OP_GRANT:
@@ -2869,7 +2870,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2869 break; 2870 break;
2870 2871
2871 default: 2872 default:
2872 spin_unlock(&inode->i_lock); 2873 spin_unlock(&ci->i_ceph_lock);
2873 pr_err("ceph_handle_caps: unknown cap op %d %s\n", op, 2874 pr_err("ceph_handle_caps: unknown cap op %d %s\n", op,
2874 ceph_cap_op_name(op)); 2875 ceph_cap_op_name(op));
2875 } 2876 }
@@ -2962,13 +2963,13 @@ void ceph_put_fmode(struct ceph_inode_info *ci, int fmode)
2962 struct inode *inode = &ci->vfs_inode; 2963 struct inode *inode = &ci->vfs_inode;
2963 int last = 0; 2964 int last = 0;
2964 2965
2965 spin_lock(&inode->i_lock); 2966 spin_lock(&ci->i_ceph_lock);
2966 dout("put_fmode %p fmode %d %d -> %d\n", inode, fmode, 2967 dout("put_fmode %p fmode %d %d -> %d\n", inode, fmode,
2967 ci->i_nr_by_mode[fmode], ci->i_nr_by_mode[fmode]-1); 2968 ci->i_nr_by_mode[fmode], ci->i_nr_by_mode[fmode]-1);
2968 BUG_ON(ci->i_nr_by_mode[fmode] == 0); 2969 BUG_ON(ci->i_nr_by_mode[fmode] == 0);
2969 if (--ci->i_nr_by_mode[fmode] == 0) 2970 if (--ci->i_nr_by_mode[fmode] == 0)
2970 last++; 2971 last++;
2971 spin_unlock(&inode->i_lock); 2972 spin_unlock(&ci->i_ceph_lock);
2972 2973
2973 if (last && ci->i_vino.snap == CEPH_NOSNAP) 2974 if (last && ci->i_vino.snap == CEPH_NOSNAP)
2974 ceph_check_caps(ci, 0, NULL); 2975 ceph_check_caps(ci, 0, NULL);
@@ -2991,7 +2992,7 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
2991 int used, dirty; 2992 int used, dirty;
2992 int ret = 0; 2993 int ret = 0;
2993 2994
2994 spin_lock(&inode->i_lock); 2995 spin_lock(&ci->i_ceph_lock);
2995 used = __ceph_caps_used(ci); 2996 used = __ceph_caps_used(ci);
2996 dirty = __ceph_caps_dirty(ci); 2997 dirty = __ceph_caps_dirty(ci);
2997 2998
@@ -3046,7 +3047,7 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
3046 inode, cap, ceph_cap_string(cap->issued)); 3047 inode, cap, ceph_cap_string(cap->issued));
3047 } 3048 }
3048 } 3049 }
3049 spin_unlock(&inode->i_lock); 3050 spin_unlock(&ci->i_ceph_lock);
3050 return ret; 3051 return ret;
3051} 3052}
3052 3053
@@ -3061,7 +3062,7 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
3061 3062
3062 /* 3063 /*
3063 * force an record for the directory caps if we have a dentry lease. 3064 * force an record for the directory caps if we have a dentry lease.
3064 * this is racy (can't take i_lock and d_lock together), but it 3065 * this is racy (can't take i_ceph_lock and d_lock together), but it
3065 * doesn't have to be perfect; the mds will revoke anything we don't 3066 * doesn't have to be perfect; the mds will revoke anything we don't
3066 * release. 3067 * release.
3067 */ 3068 */
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index bca3948e9dbf..3eeb97661262 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -281,18 +281,18 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
281 } 281 }
282 282
283 /* can we use the dcache? */ 283 /* can we use the dcache? */
284 spin_lock(&inode->i_lock); 284 spin_lock(&ci->i_ceph_lock);
285 if ((filp->f_pos == 2 || fi->dentry) && 285 if ((filp->f_pos == 2 || fi->dentry) &&
286 !ceph_test_mount_opt(fsc, NOASYNCREADDIR) && 286 !ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
287 ceph_snap(inode) != CEPH_SNAPDIR && 287 ceph_snap(inode) != CEPH_SNAPDIR &&
288 ceph_dir_test_complete(inode) && 288 ceph_dir_test_complete(inode) &&
289 __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) { 289 __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
290 spin_unlock(&inode->i_lock); 290 spin_unlock(&ci->i_ceph_lock);
291 err = __dcache_readdir(filp, dirent, filldir); 291 err = __dcache_readdir(filp, dirent, filldir);
292 if (err != -EAGAIN) 292 if (err != -EAGAIN)
293 return err; 293 return err;
294 } else { 294 } else {
295 spin_unlock(&inode->i_lock); 295 spin_unlock(&ci->i_ceph_lock);
296 } 296 }
297 if (fi->dentry) { 297 if (fi->dentry) {
298 err = note_last_dentry(fi, fi->dentry->d_name.name, 298 err = note_last_dentry(fi, fi->dentry->d_name.name,
@@ -428,12 +428,12 @@ more:
428 * were released during the whole readdir, and we should have 428 * were released during the whole readdir, and we should have
429 * the complete dir contents in our cache. 429 * the complete dir contents in our cache.
430 */ 430 */
431 spin_lock(&inode->i_lock); 431 spin_lock(&ci->i_ceph_lock);
432 if (ci->i_release_count == fi->dir_release_count) { 432 if (ci->i_release_count == fi->dir_release_count) {
433 ceph_dir_set_complete(inode); 433 ceph_dir_set_complete(inode);
434 ci->i_max_offset = filp->f_pos; 434 ci->i_max_offset = filp->f_pos;
435 } 435 }
436 spin_unlock(&inode->i_lock); 436 spin_unlock(&ci->i_ceph_lock);
437 437
438 dout("readdir %p filp %p done.\n", inode, filp); 438 dout("readdir %p filp %p done.\n", inode, filp);
439 return 0; 439 return 0;
@@ -607,7 +607,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
607 struct ceph_inode_info *ci = ceph_inode(dir); 607 struct ceph_inode_info *ci = ceph_inode(dir);
608 struct ceph_dentry_info *di = ceph_dentry(dentry); 608 struct ceph_dentry_info *di = ceph_dentry(dentry);
609 609
610 spin_lock(&dir->i_lock); 610 spin_lock(&ci->i_ceph_lock);
611 dout(" dir %p flags are %d\n", dir, ci->i_ceph_flags); 611 dout(" dir %p flags are %d\n", dir, ci->i_ceph_flags);
612 if (strncmp(dentry->d_name.name, 612 if (strncmp(dentry->d_name.name,
613 fsc->mount_options->snapdir_name, 613 fsc->mount_options->snapdir_name,
@@ -615,13 +615,13 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
615 !is_root_ceph_dentry(dir, dentry) && 615 !is_root_ceph_dentry(dir, dentry) &&
616 ceph_dir_test_complete(dir) && 616 ceph_dir_test_complete(dir) &&
617 (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) { 617 (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) {
618 spin_unlock(&dir->i_lock); 618 spin_unlock(&ci->i_ceph_lock);
619 dout(" dir %p complete, -ENOENT\n", dir); 619 dout(" dir %p complete, -ENOENT\n", dir);
620 d_add(dentry, NULL); 620 d_add(dentry, NULL);
621 di->lease_shared_gen = ci->i_shared_gen; 621 di->lease_shared_gen = ci->i_shared_gen;
622 return NULL; 622 return NULL;
623 } 623 }
624 spin_unlock(&dir->i_lock); 624 spin_unlock(&ci->i_ceph_lock);
625 } 625 }
626 626
627 op = ceph_snap(dir) == CEPH_SNAPDIR ? 627 op = ceph_snap(dir) == CEPH_SNAPDIR ?
@@ -841,12 +841,12 @@ static int drop_caps_for_unlink(struct inode *inode)
841 struct ceph_inode_info *ci = ceph_inode(inode); 841 struct ceph_inode_info *ci = ceph_inode(inode);
842 int drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL; 842 int drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL;
843 843
844 spin_lock(&inode->i_lock); 844 spin_lock(&ci->i_ceph_lock);
845 if (inode->i_nlink == 1) { 845 if (inode->i_nlink == 1) {
846 drop |= ~(__ceph_caps_wanted(ci) | CEPH_CAP_PIN); 846 drop |= ~(__ceph_caps_wanted(ci) | CEPH_CAP_PIN);
847 ci->i_ceph_flags |= CEPH_I_NODELAY; 847 ci->i_ceph_flags |= CEPH_I_NODELAY;
848 } 848 }
849 spin_unlock(&inode->i_lock); 849 spin_unlock(&ci->i_ceph_lock);
850 return drop; 850 return drop;
851} 851}
852 852
@@ -1015,10 +1015,10 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry)
1015 struct ceph_dentry_info *di = ceph_dentry(dentry); 1015 struct ceph_dentry_info *di = ceph_dentry(dentry);
1016 int valid = 0; 1016 int valid = 0;
1017 1017
1018 spin_lock(&dir->i_lock); 1018 spin_lock(&ci->i_ceph_lock);
1019 if (ci->i_shared_gen == di->lease_shared_gen) 1019 if (ci->i_shared_gen == di->lease_shared_gen)
1020 valid = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1); 1020 valid = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1);
1021 spin_unlock(&dir->i_lock); 1021 spin_unlock(&ci->i_ceph_lock);
1022 dout("dir_lease_is_valid dir %p v%u dentry %p v%u = %d\n", 1022 dout("dir_lease_is_valid dir %p v%u dentry %p v%u = %d\n",
1023 dir, (unsigned)ci->i_shared_gen, dentry, 1023 dir, (unsigned)ci->i_shared_gen, dentry,
1024 (unsigned)di->lease_shared_gen, valid); 1024 (unsigned)di->lease_shared_gen, valid);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index ce549d31eeb7..ed72428d9c75 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -147,9 +147,9 @@ int ceph_open(struct inode *inode, struct file *file)
147 147
148 /* trivially open snapdir */ 148 /* trivially open snapdir */
149 if (ceph_snap(inode) == CEPH_SNAPDIR) { 149 if (ceph_snap(inode) == CEPH_SNAPDIR) {
150 spin_lock(&inode->i_lock); 150 spin_lock(&ci->i_ceph_lock);
151 __ceph_get_fmode(ci, fmode); 151 __ceph_get_fmode(ci, fmode);
152 spin_unlock(&inode->i_lock); 152 spin_unlock(&ci->i_ceph_lock);
153 return ceph_init_file(inode, file, fmode); 153 return ceph_init_file(inode, file, fmode);
154 } 154 }
155 155
@@ -158,7 +158,7 @@ int ceph_open(struct inode *inode, struct file *file)
158 * write) or any MDS (for read). Update wanted set 158 * write) or any MDS (for read). Update wanted set
159 * asynchronously. 159 * asynchronously.
160 */ 160 */
161 spin_lock(&inode->i_lock); 161 spin_lock(&ci->i_ceph_lock);
162 if (__ceph_is_any_real_caps(ci) && 162 if (__ceph_is_any_real_caps(ci) &&
163 (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) { 163 (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) {
164 int mds_wanted = __ceph_caps_mds_wanted(ci); 164 int mds_wanted = __ceph_caps_mds_wanted(ci);
@@ -168,7 +168,7 @@ int ceph_open(struct inode *inode, struct file *file)
168 inode, fmode, ceph_cap_string(wanted), 168 inode, fmode, ceph_cap_string(wanted),
169 ceph_cap_string(issued)); 169 ceph_cap_string(issued));
170 __ceph_get_fmode(ci, fmode); 170 __ceph_get_fmode(ci, fmode);
171 spin_unlock(&inode->i_lock); 171 spin_unlock(&ci->i_ceph_lock);
172 172
173 /* adjust wanted? */ 173 /* adjust wanted? */
174 if ((issued & wanted) != wanted && 174 if ((issued & wanted) != wanted &&
@@ -180,10 +180,10 @@ int ceph_open(struct inode *inode, struct file *file)
180 } else if (ceph_snap(inode) != CEPH_NOSNAP && 180 } else if (ceph_snap(inode) != CEPH_NOSNAP &&
181 (ci->i_snap_caps & wanted) == wanted) { 181 (ci->i_snap_caps & wanted) == wanted) {
182 __ceph_get_fmode(ci, fmode); 182 __ceph_get_fmode(ci, fmode);
183 spin_unlock(&inode->i_lock); 183 spin_unlock(&ci->i_ceph_lock);
184 return ceph_init_file(inode, file, fmode); 184 return ceph_init_file(inode, file, fmode);
185 } 185 }
186 spin_unlock(&inode->i_lock); 186 spin_unlock(&ci->i_ceph_lock);
187 187
188 dout("open fmode %d wants %s\n", fmode, ceph_cap_string(wanted)); 188 dout("open fmode %d wants %s\n", fmode, ceph_cap_string(wanted));
189 req = prepare_open_request(inode->i_sb, flags, 0); 189 req = prepare_open_request(inode->i_sb, flags, 0);
@@ -743,9 +743,9 @@ retry_snap:
743 */ 743 */
744 int dirty; 744 int dirty;
745 745
746 spin_lock(&inode->i_lock); 746 spin_lock(&ci->i_ceph_lock);
747 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); 747 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
748 spin_unlock(&inode->i_lock); 748 spin_unlock(&ci->i_ceph_lock);
749 ceph_put_cap_refs(ci, got); 749 ceph_put_cap_refs(ci, got);
750 750
751 ret = generic_file_aio_write(iocb, iov, nr_segs, pos); 751 ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
@@ -764,9 +764,9 @@ retry_snap:
764 764
765 if (ret >= 0) { 765 if (ret >= 0) {
766 int dirty; 766 int dirty;
767 spin_lock(&inode->i_lock); 767 spin_lock(&ci->i_ceph_lock);
768 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); 768 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
769 spin_unlock(&inode->i_lock); 769 spin_unlock(&ci->i_ceph_lock);
770 if (dirty) 770 if (dirty)
771 __mark_inode_dirty(inode, dirty); 771 __mark_inode_dirty(inode, dirty);
772 } 772 }
@@ -797,7 +797,8 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int origin)
797 797
798 mutex_lock(&inode->i_mutex); 798 mutex_lock(&inode->i_mutex);
799 __ceph_do_pending_vmtruncate(inode); 799 __ceph_do_pending_vmtruncate(inode);
800 if (origin != SEEK_CUR || origin != SEEK_SET) { 800
801 if (origin == SEEK_END || origin == SEEK_DATA || origin == SEEK_HOLE) {
801 ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE); 802 ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE);
802 if (ret < 0) { 803 if (ret < 0) {
803 offset = ret; 804 offset = ret;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 116f36502f17..87fb132fb330 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -297,6 +297,8 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
297 297
298 dout("alloc_inode %p\n", &ci->vfs_inode); 298 dout("alloc_inode %p\n", &ci->vfs_inode);
299 299
300 spin_lock_init(&ci->i_ceph_lock);
301
300 ci->i_version = 0; 302 ci->i_version = 0;
301 ci->i_time_warp_seq = 0; 303 ci->i_time_warp_seq = 0;
302 ci->i_ceph_flags = 0; 304 ci->i_ceph_flags = 0;
@@ -583,7 +585,7 @@ static int fill_inode(struct inode *inode,
583 iinfo->xattr_len); 585 iinfo->xattr_len);
584 } 586 }
585 587
586 spin_lock(&inode->i_lock); 588 spin_lock(&ci->i_ceph_lock);
587 589
588 /* 590 /*
589 * provided version will be odd if inode value is projected, 591 * provided version will be odd if inode value is projected,
@@ -680,7 +682,7 @@ static int fill_inode(struct inode *inode,
680 char *sym; 682 char *sym;
681 683
682 BUG_ON(symlen != inode->i_size); 684 BUG_ON(symlen != inode->i_size);
683 spin_unlock(&inode->i_lock); 685 spin_unlock(&ci->i_ceph_lock);
684 686
685 err = -ENOMEM; 687 err = -ENOMEM;
686 sym = kmalloc(symlen+1, GFP_NOFS); 688 sym = kmalloc(symlen+1, GFP_NOFS);
@@ -689,7 +691,7 @@ static int fill_inode(struct inode *inode,
689 memcpy(sym, iinfo->symlink, symlen); 691 memcpy(sym, iinfo->symlink, symlen);
690 sym[symlen] = 0; 692 sym[symlen] = 0;
691 693
692 spin_lock(&inode->i_lock); 694 spin_lock(&ci->i_ceph_lock);
693 if (!ci->i_symlink) 695 if (!ci->i_symlink)
694 ci->i_symlink = sym; 696 ci->i_symlink = sym;
695 else 697 else
@@ -715,7 +717,7 @@ static int fill_inode(struct inode *inode,
715 } 717 }
716 718
717no_change: 719no_change:
718 spin_unlock(&inode->i_lock); 720 spin_unlock(&ci->i_ceph_lock);
719 721
720 /* queue truncate if we saw i_size decrease */ 722 /* queue truncate if we saw i_size decrease */
721 if (queue_trunc) 723 if (queue_trunc)
@@ -750,13 +752,13 @@ no_change:
750 info->cap.flags, 752 info->cap.flags,
751 caps_reservation); 753 caps_reservation);
752 } else { 754 } else {
753 spin_lock(&inode->i_lock); 755 spin_lock(&ci->i_ceph_lock);
754 dout(" %p got snap_caps %s\n", inode, 756 dout(" %p got snap_caps %s\n", inode,
755 ceph_cap_string(le32_to_cpu(info->cap.caps))); 757 ceph_cap_string(le32_to_cpu(info->cap.caps)));
756 ci->i_snap_caps |= le32_to_cpu(info->cap.caps); 758 ci->i_snap_caps |= le32_to_cpu(info->cap.caps);
757 if (cap_fmode >= 0) 759 if (cap_fmode >= 0)
758 __ceph_get_fmode(ci, cap_fmode); 760 __ceph_get_fmode(ci, cap_fmode);
759 spin_unlock(&inode->i_lock); 761 spin_unlock(&ci->i_ceph_lock);
760 } 762 }
761 } else if (cap_fmode >= 0) { 763 } else if (cap_fmode >= 0) {
762 pr_warning("mds issued no caps on %llx.%llx\n", 764 pr_warning("mds issued no caps on %llx.%llx\n",
@@ -849,19 +851,20 @@ static void ceph_set_dentry_offset(struct dentry *dn)
849{ 851{
850 struct dentry *dir = dn->d_parent; 852 struct dentry *dir = dn->d_parent;
851 struct inode *inode = dir->d_inode; 853 struct inode *inode = dir->d_inode;
854 struct ceph_inode_info *ci = ceph_inode(inode);
852 struct ceph_dentry_info *di; 855 struct ceph_dentry_info *di;
853 856
854 BUG_ON(!inode); 857 BUG_ON(!inode);
855 858
856 di = ceph_dentry(dn); 859 di = ceph_dentry(dn);
857 860
858 spin_lock(&inode->i_lock); 861 spin_lock(&ci->i_ceph_lock);
859 if (!ceph_dir_test_complete(inode)) { 862 if (!ceph_dir_test_complete(inode)) {
860 spin_unlock(&inode->i_lock); 863 spin_unlock(&ci->i_ceph_lock);
861 return; 864 return;
862 } 865 }
863 di->offset = ceph_inode(inode)->i_max_offset++; 866 di->offset = ceph_inode(inode)->i_max_offset++;
864 spin_unlock(&inode->i_lock); 867 spin_unlock(&ci->i_ceph_lock);
865 868
866 spin_lock(&dir->d_lock); 869 spin_lock(&dir->d_lock);
867 spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED); 870 spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED);
@@ -1308,7 +1311,7 @@ int ceph_inode_set_size(struct inode *inode, loff_t size)
1308 struct ceph_inode_info *ci = ceph_inode(inode); 1311 struct ceph_inode_info *ci = ceph_inode(inode);
1309 int ret = 0; 1312 int ret = 0;
1310 1313
1311 spin_lock(&inode->i_lock); 1314 spin_lock(&ci->i_ceph_lock);
1312 dout("set_size %p %llu -> %llu\n", inode, inode->i_size, size); 1315 dout("set_size %p %llu -> %llu\n", inode, inode->i_size, size);
1313 inode->i_size = size; 1316 inode->i_size = size;
1314 inode->i_blocks = (size + (1 << 9) - 1) >> 9; 1317 inode->i_blocks = (size + (1 << 9) - 1) >> 9;
@@ -1318,7 +1321,7 @@ int ceph_inode_set_size(struct inode *inode, loff_t size)
1318 (ci->i_reported_size << 1) < ci->i_max_size) 1321 (ci->i_reported_size << 1) < ci->i_max_size)
1319 ret = 1; 1322 ret = 1;
1320 1323
1321 spin_unlock(&inode->i_lock); 1324 spin_unlock(&ci->i_ceph_lock);
1322 return ret; 1325 return ret;
1323} 1326}
1324 1327
@@ -1376,20 +1379,20 @@ static void ceph_invalidate_work(struct work_struct *work)
1376 u32 orig_gen; 1379 u32 orig_gen;
1377 int check = 0; 1380 int check = 0;
1378 1381
1379 spin_lock(&inode->i_lock); 1382 spin_lock(&ci->i_ceph_lock);
1380 dout("invalidate_pages %p gen %d revoking %d\n", inode, 1383 dout("invalidate_pages %p gen %d revoking %d\n", inode,
1381 ci->i_rdcache_gen, ci->i_rdcache_revoking); 1384 ci->i_rdcache_gen, ci->i_rdcache_revoking);
1382 if (ci->i_rdcache_revoking != ci->i_rdcache_gen) { 1385 if (ci->i_rdcache_revoking != ci->i_rdcache_gen) {
1383 /* nevermind! */ 1386 /* nevermind! */
1384 spin_unlock(&inode->i_lock); 1387 spin_unlock(&ci->i_ceph_lock);
1385 goto out; 1388 goto out;
1386 } 1389 }
1387 orig_gen = ci->i_rdcache_gen; 1390 orig_gen = ci->i_rdcache_gen;
1388 spin_unlock(&inode->i_lock); 1391 spin_unlock(&ci->i_ceph_lock);
1389 1392
1390 truncate_inode_pages(&inode->i_data, 0); 1393 truncate_inode_pages(&inode->i_data, 0);
1391 1394
1392 spin_lock(&inode->i_lock); 1395 spin_lock(&ci->i_ceph_lock);
1393 if (orig_gen == ci->i_rdcache_gen && 1396 if (orig_gen == ci->i_rdcache_gen &&
1394 orig_gen == ci->i_rdcache_revoking) { 1397 orig_gen == ci->i_rdcache_revoking) {
1395 dout("invalidate_pages %p gen %d successful\n", inode, 1398 dout("invalidate_pages %p gen %d successful\n", inode,
@@ -1401,7 +1404,7 @@ static void ceph_invalidate_work(struct work_struct *work)
1401 inode, orig_gen, ci->i_rdcache_gen, 1404 inode, orig_gen, ci->i_rdcache_gen,
1402 ci->i_rdcache_revoking); 1405 ci->i_rdcache_revoking);
1403 } 1406 }
1404 spin_unlock(&inode->i_lock); 1407 spin_unlock(&ci->i_ceph_lock);
1405 1408
1406 if (check) 1409 if (check)
1407 ceph_check_caps(ci, 0, NULL); 1410 ceph_check_caps(ci, 0, NULL);
@@ -1460,10 +1463,10 @@ void __ceph_do_pending_vmtruncate(struct inode *inode)
1460 int wrbuffer_refs, wake = 0; 1463 int wrbuffer_refs, wake = 0;
1461 1464
1462retry: 1465retry:
1463 spin_lock(&inode->i_lock); 1466 spin_lock(&ci->i_ceph_lock);
1464 if (ci->i_truncate_pending == 0) { 1467 if (ci->i_truncate_pending == 0) {
1465 dout("__do_pending_vmtruncate %p none pending\n", inode); 1468 dout("__do_pending_vmtruncate %p none pending\n", inode);
1466 spin_unlock(&inode->i_lock); 1469 spin_unlock(&ci->i_ceph_lock);
1467 return; 1470 return;
1468 } 1471 }
1469 1472
@@ -1474,7 +1477,7 @@ retry:
1474 if (ci->i_wrbuffer_ref_head < ci->i_wrbuffer_ref) { 1477 if (ci->i_wrbuffer_ref_head < ci->i_wrbuffer_ref) {
1475 dout("__do_pending_vmtruncate %p flushing snaps first\n", 1478 dout("__do_pending_vmtruncate %p flushing snaps first\n",
1476 inode); 1479 inode);
1477 spin_unlock(&inode->i_lock); 1480 spin_unlock(&ci->i_ceph_lock);
1478 filemap_write_and_wait_range(&inode->i_data, 0, 1481 filemap_write_and_wait_range(&inode->i_data, 0,
1479 inode->i_sb->s_maxbytes); 1482 inode->i_sb->s_maxbytes);
1480 goto retry; 1483 goto retry;
@@ -1484,15 +1487,15 @@ retry:
1484 wrbuffer_refs = ci->i_wrbuffer_ref; 1487 wrbuffer_refs = ci->i_wrbuffer_ref;
1485 dout("__do_pending_vmtruncate %p (%d) to %lld\n", inode, 1488 dout("__do_pending_vmtruncate %p (%d) to %lld\n", inode,
1486 ci->i_truncate_pending, to); 1489 ci->i_truncate_pending, to);
1487 spin_unlock(&inode->i_lock); 1490 spin_unlock(&ci->i_ceph_lock);
1488 1491
1489 truncate_inode_pages(inode->i_mapping, to); 1492 truncate_inode_pages(inode->i_mapping, to);
1490 1493
1491 spin_lock(&inode->i_lock); 1494 spin_lock(&ci->i_ceph_lock);
1492 ci->i_truncate_pending--; 1495 ci->i_truncate_pending--;
1493 if (ci->i_truncate_pending == 0) 1496 if (ci->i_truncate_pending == 0)
1494 wake = 1; 1497 wake = 1;
1495 spin_unlock(&inode->i_lock); 1498 spin_unlock(&ci->i_ceph_lock);
1496 1499
1497 if (wrbuffer_refs == 0) 1500 if (wrbuffer_refs == 0)
1498 ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); 1501 ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
@@ -1547,7 +1550,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
1547 if (IS_ERR(req)) 1550 if (IS_ERR(req))
1548 return PTR_ERR(req); 1551 return PTR_ERR(req);
1549 1552
1550 spin_lock(&inode->i_lock); 1553 spin_lock(&ci->i_ceph_lock);
1551 issued = __ceph_caps_issued(ci, NULL); 1554 issued = __ceph_caps_issued(ci, NULL);
1552 dout("setattr %p issued %s\n", inode, ceph_cap_string(issued)); 1555 dout("setattr %p issued %s\n", inode, ceph_cap_string(issued));
1553 1556
@@ -1695,7 +1698,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
1695 } 1698 }
1696 1699
1697 release &= issued; 1700 release &= issued;
1698 spin_unlock(&inode->i_lock); 1701 spin_unlock(&ci->i_ceph_lock);
1699 1702
1700 if (inode_dirty_flags) 1703 if (inode_dirty_flags)
1701 __mark_inode_dirty(inode, inode_dirty_flags); 1704 __mark_inode_dirty(inode, inode_dirty_flags);
@@ -1717,7 +1720,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
1717 __ceph_do_pending_vmtruncate(inode); 1720 __ceph_do_pending_vmtruncate(inode);
1718 return err; 1721 return err;
1719out: 1722out:
1720 spin_unlock(&inode->i_lock); 1723 spin_unlock(&ci->i_ceph_lock);
1721 ceph_mdsc_put_request(req); 1724 ceph_mdsc_put_request(req);
1722 return err; 1725 return err;
1723} 1726}
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index 5a14c29cbba6..790914a598dd 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -241,11 +241,11 @@ static long ceph_ioctl_lazyio(struct file *file)
241 struct ceph_inode_info *ci = ceph_inode(inode); 241 struct ceph_inode_info *ci = ceph_inode(inode);
242 242
243 if ((fi->fmode & CEPH_FILE_MODE_LAZY) == 0) { 243 if ((fi->fmode & CEPH_FILE_MODE_LAZY) == 0) {
244 spin_lock(&inode->i_lock); 244 spin_lock(&ci->i_ceph_lock);
245 ci->i_nr_by_mode[fi->fmode]--; 245 ci->i_nr_by_mode[fi->fmode]--;
246 fi->fmode |= CEPH_FILE_MODE_LAZY; 246 fi->fmode |= CEPH_FILE_MODE_LAZY;
247 ci->i_nr_by_mode[fi->fmode]++; 247 ci->i_nr_by_mode[fi->fmode]++;
248 spin_unlock(&inode->i_lock); 248 spin_unlock(&ci->i_ceph_lock);
249 dout("ioctl_layzio: file %p marked lazy\n", file); 249 dout("ioctl_layzio: file %p marked lazy\n", file);
250 250
251 ceph_check_caps(ci, 0, NULL); 251 ceph_check_caps(ci, 0, NULL);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 264ab701154f..6203d805eb45 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -732,21 +732,21 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
732 } 732 }
733 } 733 }
734 734
735 spin_lock(&inode->i_lock); 735 spin_lock(&ci->i_ceph_lock);
736 cap = NULL; 736 cap = NULL;
737 if (mode == USE_AUTH_MDS) 737 if (mode == USE_AUTH_MDS)
738 cap = ci->i_auth_cap; 738 cap = ci->i_auth_cap;
739 if (!cap && !RB_EMPTY_ROOT(&ci->i_caps)) 739 if (!cap && !RB_EMPTY_ROOT(&ci->i_caps))
740 cap = rb_entry(rb_first(&ci->i_caps), struct ceph_cap, ci_node); 740 cap = rb_entry(rb_first(&ci->i_caps), struct ceph_cap, ci_node);
741 if (!cap) { 741 if (!cap) {
742 spin_unlock(&inode->i_lock); 742 spin_unlock(&ci->i_ceph_lock);
743 goto random; 743 goto random;
744 } 744 }
745 mds = cap->session->s_mds; 745 mds = cap->session->s_mds;
746 dout("choose_mds %p %llx.%llx mds%d (%scap %p)\n", 746 dout("choose_mds %p %llx.%llx mds%d (%scap %p)\n",
747 inode, ceph_vinop(inode), mds, 747 inode, ceph_vinop(inode), mds,
748 cap == ci->i_auth_cap ? "auth " : "", cap); 748 cap == ci->i_auth_cap ? "auth " : "", cap);
749 spin_unlock(&inode->i_lock); 749 spin_unlock(&ci->i_ceph_lock);
750 return mds; 750 return mds;
751 751
752random: 752random:
@@ -951,7 +951,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
951 951
952 dout("removing cap %p, ci is %p, inode is %p\n", 952 dout("removing cap %p, ci is %p, inode is %p\n",
953 cap, ci, &ci->vfs_inode); 953 cap, ci, &ci->vfs_inode);
954 spin_lock(&inode->i_lock); 954 spin_lock(&ci->i_ceph_lock);
955 __ceph_remove_cap(cap); 955 __ceph_remove_cap(cap);
956 if (!__ceph_is_any_real_caps(ci)) { 956 if (!__ceph_is_any_real_caps(ci)) {
957 struct ceph_mds_client *mdsc = 957 struct ceph_mds_client *mdsc =
@@ -984,7 +984,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
984 } 984 }
985 spin_unlock(&mdsc->cap_dirty_lock); 985 spin_unlock(&mdsc->cap_dirty_lock);
986 } 986 }
987 spin_unlock(&inode->i_lock); 987 spin_unlock(&ci->i_ceph_lock);
988 while (drop--) 988 while (drop--)
989 iput(inode); 989 iput(inode);
990 return 0; 990 return 0;
@@ -1015,10 +1015,10 @@ static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap,
1015 1015
1016 wake_up_all(&ci->i_cap_wq); 1016 wake_up_all(&ci->i_cap_wq);
1017 if (arg) { 1017 if (arg) {
1018 spin_lock(&inode->i_lock); 1018 spin_lock(&ci->i_ceph_lock);
1019 ci->i_wanted_max_size = 0; 1019 ci->i_wanted_max_size = 0;
1020 ci->i_requested_max_size = 0; 1020 ci->i_requested_max_size = 0;
1021 spin_unlock(&inode->i_lock); 1021 spin_unlock(&ci->i_ceph_lock);
1022 } 1022 }
1023 return 0; 1023 return 0;
1024} 1024}
@@ -1151,7 +1151,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
1151 if (session->s_trim_caps <= 0) 1151 if (session->s_trim_caps <= 0)
1152 return -1; 1152 return -1;
1153 1153
1154 spin_lock(&inode->i_lock); 1154 spin_lock(&ci->i_ceph_lock);
1155 mine = cap->issued | cap->implemented; 1155 mine = cap->issued | cap->implemented;
1156 used = __ceph_caps_used(ci); 1156 used = __ceph_caps_used(ci);
1157 oissued = __ceph_caps_issued_other(ci, cap); 1157 oissued = __ceph_caps_issued_other(ci, cap);
@@ -1170,7 +1170,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
1170 __ceph_remove_cap(cap); 1170 __ceph_remove_cap(cap);
1171 } else { 1171 } else {
1172 /* try to drop referring dentries */ 1172 /* try to drop referring dentries */
1173 spin_unlock(&inode->i_lock); 1173 spin_unlock(&ci->i_ceph_lock);
1174 d_prune_aliases(inode); 1174 d_prune_aliases(inode);
1175 dout("trim_caps_cb %p cap %p pruned, count now %d\n", 1175 dout("trim_caps_cb %p cap %p pruned, count now %d\n",
1176 inode, cap, atomic_read(&inode->i_count)); 1176 inode, cap, atomic_read(&inode->i_count));
@@ -1178,7 +1178,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
1178 } 1178 }
1179 1179
1180out: 1180out:
1181 spin_unlock(&inode->i_lock); 1181 spin_unlock(&ci->i_ceph_lock);
1182 return 0; 1182 return 0;
1183} 1183}
1184 1184
@@ -1296,7 +1296,7 @@ static int check_cap_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq)
1296 i_flushing_item); 1296 i_flushing_item);
1297 struct inode *inode = &ci->vfs_inode; 1297 struct inode *inode = &ci->vfs_inode;
1298 1298
1299 spin_lock(&inode->i_lock); 1299 spin_lock(&ci->i_ceph_lock);
1300 if (ci->i_cap_flush_seq <= want_flush_seq) { 1300 if (ci->i_cap_flush_seq <= want_flush_seq) {
1301 dout("check_cap_flush still flushing %p " 1301 dout("check_cap_flush still flushing %p "
1302 "seq %lld <= %lld to mds%d\n", inode, 1302 "seq %lld <= %lld to mds%d\n", inode,
@@ -1304,7 +1304,7 @@ static int check_cap_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq)
1304 session->s_mds); 1304 session->s_mds);
1305 ret = 0; 1305 ret = 0;
1306 } 1306 }
1307 spin_unlock(&inode->i_lock); 1307 spin_unlock(&ci->i_ceph_lock);
1308 } 1308 }
1309 mutex_unlock(&session->s_mutex); 1309 mutex_unlock(&session->s_mutex);
1310 ceph_put_mds_session(session); 1310 ceph_put_mds_session(session);
@@ -1495,6 +1495,7 @@ retry:
1495 pos, temp); 1495 pos, temp);
1496 } else if (stop_on_nosnap && inode && 1496 } else if (stop_on_nosnap && inode &&
1497 ceph_snap(inode) == CEPH_NOSNAP) { 1497 ceph_snap(inode) == CEPH_NOSNAP) {
1498 spin_unlock(&temp->d_lock);
1498 break; 1499 break;
1499 } else { 1500 } else {
1500 pos -= temp->d_name.len; 1501 pos -= temp->d_name.len;
@@ -2011,10 +2012,10 @@ void ceph_invalidate_dir_request(struct ceph_mds_request *req)
2011 struct ceph_inode_info *ci = ceph_inode(inode); 2012 struct ceph_inode_info *ci = ceph_inode(inode);
2012 2013
2013 dout("invalidate_dir_request %p (D_COMPLETE, lease(s))\n", inode); 2014 dout("invalidate_dir_request %p (D_COMPLETE, lease(s))\n", inode);
2014 spin_lock(&inode->i_lock); 2015 spin_lock(&ci->i_ceph_lock);
2015 ceph_dir_clear_complete(inode); 2016 ceph_dir_clear_complete(inode);
2016 ci->i_release_count++; 2017 ci->i_release_count++;
2017 spin_unlock(&inode->i_lock); 2018 spin_unlock(&ci->i_ceph_lock);
2018 2019
2019 if (req->r_dentry) 2020 if (req->r_dentry)
2020 ceph_invalidate_dentry_lease(req->r_dentry); 2021 ceph_invalidate_dentry_lease(req->r_dentry);
@@ -2422,7 +2423,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
2422 if (err) 2423 if (err)
2423 goto out_free; 2424 goto out_free;
2424 2425
2425 spin_lock(&inode->i_lock); 2426 spin_lock(&ci->i_ceph_lock);
2426 cap->seq = 0; /* reset cap seq */ 2427 cap->seq = 0; /* reset cap seq */
2427 cap->issue_seq = 0; /* and issue_seq */ 2428 cap->issue_seq = 0; /* and issue_seq */
2428 2429
@@ -2445,7 +2446,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
2445 rec.v1.pathbase = cpu_to_le64(pathbase); 2446 rec.v1.pathbase = cpu_to_le64(pathbase);
2446 reclen = sizeof(rec.v1); 2447 reclen = sizeof(rec.v1);
2447 } 2448 }
2448 spin_unlock(&inode->i_lock); 2449 spin_unlock(&ci->i_ceph_lock);
2449 2450
2450 if (recon_state->flock) { 2451 if (recon_state->flock) {
2451 int num_fcntl_locks, num_flock_locks; 2452 int num_fcntl_locks, num_flock_locks;
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 4bb239921dbd..a50ca0e39475 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -20,7 +20,7 @@
20 * 20 *
21 * mdsc->snap_rwsem 21 * mdsc->snap_rwsem
22 * 22 *
23 * inode->i_lock 23 * ci->i_ceph_lock
24 * mdsc->snap_flush_lock 24 * mdsc->snap_flush_lock
25 * mdsc->cap_delay_lock 25 * mdsc->cap_delay_lock
26 * 26 *
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index e26437191333..a559c80f127a 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -446,7 +446,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
446 return; 446 return;
447 } 447 }
448 448
449 spin_lock(&inode->i_lock); 449 spin_lock(&ci->i_ceph_lock);
450 used = __ceph_caps_used(ci); 450 used = __ceph_caps_used(ci);
451 dirty = __ceph_caps_dirty(ci); 451 dirty = __ceph_caps_dirty(ci);
452 452
@@ -528,7 +528,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
528 kfree(capsnap); 528 kfree(capsnap);
529 } 529 }
530 530
531 spin_unlock(&inode->i_lock); 531 spin_unlock(&ci->i_ceph_lock);
532} 532}
533 533
534/* 534/*
@@ -537,7 +537,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
537 * 537 *
538 * If capsnap can now be flushed, add to snap_flush list, and return 1. 538 * If capsnap can now be flushed, add to snap_flush list, and return 1.
539 * 539 *
540 * Caller must hold i_lock. 540 * Caller must hold i_ceph_lock.
541 */ 541 */
542int __ceph_finish_cap_snap(struct ceph_inode_info *ci, 542int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
543 struct ceph_cap_snap *capsnap) 543 struct ceph_cap_snap *capsnap)
@@ -739,9 +739,9 @@ static void flush_snaps(struct ceph_mds_client *mdsc)
739 inode = &ci->vfs_inode; 739 inode = &ci->vfs_inode;
740 ihold(inode); 740 ihold(inode);
741 spin_unlock(&mdsc->snap_flush_lock); 741 spin_unlock(&mdsc->snap_flush_lock);
742 spin_lock(&inode->i_lock); 742 spin_lock(&ci->i_ceph_lock);
743 __ceph_flush_snaps(ci, &session, 0); 743 __ceph_flush_snaps(ci, &session, 0);
744 spin_unlock(&inode->i_lock); 744 spin_unlock(&ci->i_ceph_lock);
745 iput(inode); 745 iput(inode);
746 spin_lock(&mdsc->snap_flush_lock); 746 spin_lock(&mdsc->snap_flush_lock);
747 } 747 }
@@ -847,7 +847,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
847 continue; 847 continue;
848 ci = ceph_inode(inode); 848 ci = ceph_inode(inode);
849 849
850 spin_lock(&inode->i_lock); 850 spin_lock(&ci->i_ceph_lock);
851 if (!ci->i_snap_realm) 851 if (!ci->i_snap_realm)
852 goto skip_inode; 852 goto skip_inode;
853 /* 853 /*
@@ -876,7 +876,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
876 oldrealm = ci->i_snap_realm; 876 oldrealm = ci->i_snap_realm;
877 ci->i_snap_realm = realm; 877 ci->i_snap_realm = realm;
878 spin_unlock(&realm->inodes_with_caps_lock); 878 spin_unlock(&realm->inodes_with_caps_lock);
879 spin_unlock(&inode->i_lock); 879 spin_unlock(&ci->i_ceph_lock);
880 880
881 ceph_get_snap_realm(mdsc, realm); 881 ceph_get_snap_realm(mdsc, realm);
882 ceph_put_snap_realm(mdsc, oldrealm); 882 ceph_put_snap_realm(mdsc, oldrealm);
@@ -885,7 +885,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
885 continue; 885 continue;
886 886
887skip_inode: 887skip_inode:
888 spin_unlock(&inode->i_lock); 888 spin_unlock(&ci->i_ceph_lock);
889 iput(inode); 889 iput(inode);
890 } 890 }
891 891
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 8dc73a594a90..b48f15f101a0 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -383,7 +383,7 @@ static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt)
383 if (fsopt->rsize != CEPH_RSIZE_DEFAULT) 383 if (fsopt->rsize != CEPH_RSIZE_DEFAULT)
384 seq_printf(m, ",rsize=%d", fsopt->rsize); 384 seq_printf(m, ",rsize=%d", fsopt->rsize);
385 if (fsopt->rasize != CEPH_RASIZE_DEFAULT) 385 if (fsopt->rasize != CEPH_RASIZE_DEFAULT)
386 seq_printf(m, ",rasize=%d", fsopt->rsize); 386 seq_printf(m, ",rasize=%d", fsopt->rasize);
387 if (fsopt->congestion_kb != default_congestion_kb()) 387 if (fsopt->congestion_kb != default_congestion_kb())
388 seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb); 388 seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb);
389 if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT) 389 if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT)
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 01bf189e08a9..edcbf3774a56 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -220,7 +220,7 @@ struct ceph_dentry_info {
220 * The locking for D_COMPLETE is a bit odd: 220 * The locking for D_COMPLETE is a bit odd:
221 * - we can clear it at almost any time (see ceph_d_prune) 221 * - we can clear it at almost any time (see ceph_d_prune)
222 * - it is only meaningful if: 222 * - it is only meaningful if:
223 * - we hold dir inode i_lock 223 * - we hold dir inode i_ceph_lock
224 * - we hold dir FILE_SHARED caps 224 * - we hold dir FILE_SHARED caps
225 * - the dentry D_COMPLETE is set 225 * - the dentry D_COMPLETE is set
226 */ 226 */
@@ -250,6 +250,8 @@ struct ceph_inode_xattrs_info {
250struct ceph_inode_info { 250struct ceph_inode_info {
251 struct ceph_vino i_vino; /* ceph ino + snap */ 251 struct ceph_vino i_vino; /* ceph ino + snap */
252 252
253 spinlock_t i_ceph_lock;
254
253 u64 i_version; 255 u64 i_version;
254 u32 i_time_warp_seq; 256 u32 i_time_warp_seq;
255 257
@@ -271,7 +273,7 @@ struct ceph_inode_info {
271 273
272 struct ceph_inode_xattrs_info i_xattrs; 274 struct ceph_inode_xattrs_info i_xattrs;
273 275
274 /* capabilities. protected _both_ by i_lock and cap->session's 276 /* capabilities. protected _both_ by i_ceph_lock and cap->session's
275 * s_mutex. */ 277 * s_mutex. */
276 struct rb_root i_caps; /* cap list */ 278 struct rb_root i_caps; /* cap list */
277 struct ceph_cap *i_auth_cap; /* authoritative cap, if any */ 279 struct ceph_cap *i_auth_cap; /* authoritative cap, if any */
@@ -437,18 +439,18 @@ static inline void ceph_i_clear(struct inode *inode, unsigned mask)
437{ 439{
438 struct ceph_inode_info *ci = ceph_inode(inode); 440 struct ceph_inode_info *ci = ceph_inode(inode);
439 441
440 spin_lock(&inode->i_lock); 442 spin_lock(&ci->i_ceph_lock);
441 ci->i_ceph_flags &= ~mask; 443 ci->i_ceph_flags &= ~mask;
442 spin_unlock(&inode->i_lock); 444 spin_unlock(&ci->i_ceph_lock);
443} 445}
444 446
445static inline void ceph_i_set(struct inode *inode, unsigned mask) 447static inline void ceph_i_set(struct inode *inode, unsigned mask)
446{ 448{
447 struct ceph_inode_info *ci = ceph_inode(inode); 449 struct ceph_inode_info *ci = ceph_inode(inode);
448 450
449 spin_lock(&inode->i_lock); 451 spin_lock(&ci->i_ceph_lock);
450 ci->i_ceph_flags |= mask; 452 ci->i_ceph_flags |= mask;
451 spin_unlock(&inode->i_lock); 453 spin_unlock(&ci->i_ceph_lock);
452} 454}
453 455
454static inline bool ceph_i_test(struct inode *inode, unsigned mask) 456static inline bool ceph_i_test(struct inode *inode, unsigned mask)
@@ -456,9 +458,9 @@ static inline bool ceph_i_test(struct inode *inode, unsigned mask)
456 struct ceph_inode_info *ci = ceph_inode(inode); 458 struct ceph_inode_info *ci = ceph_inode(inode);
457 bool r; 459 bool r;
458 460
459 spin_lock(&inode->i_lock); 461 spin_lock(&ci->i_ceph_lock);
460 r = (ci->i_ceph_flags & mask) == mask; 462 r = (ci->i_ceph_flags & mask) == mask;
461 spin_unlock(&inode->i_lock); 463 spin_unlock(&ci->i_ceph_lock);
462 return r; 464 return r;
463} 465}
464 466
@@ -508,9 +510,9 @@ extern int __ceph_caps_issued_other(struct ceph_inode_info *ci,
508static inline int ceph_caps_issued(struct ceph_inode_info *ci) 510static inline int ceph_caps_issued(struct ceph_inode_info *ci)
509{ 511{
510 int issued; 512 int issued;
511 spin_lock(&ci->vfs_inode.i_lock); 513 spin_lock(&ci->i_ceph_lock);
512 issued = __ceph_caps_issued(ci, NULL); 514 issued = __ceph_caps_issued(ci, NULL);
513 spin_unlock(&ci->vfs_inode.i_lock); 515 spin_unlock(&ci->i_ceph_lock);
514 return issued; 516 return issued;
515} 517}
516 518
@@ -518,9 +520,9 @@ static inline int ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask,
518 int touch) 520 int touch)
519{ 521{
520 int r; 522 int r;
521 spin_lock(&ci->vfs_inode.i_lock); 523 spin_lock(&ci->i_ceph_lock);
522 r = __ceph_caps_issued_mask(ci, mask, touch); 524 r = __ceph_caps_issued_mask(ci, mask, touch);
523 spin_unlock(&ci->vfs_inode.i_lock); 525 spin_unlock(&ci->i_ceph_lock);
524 return r; 526 return r;
525} 527}
526 528
@@ -743,10 +745,9 @@ extern int ceph_add_cap(struct inode *inode,
743extern void __ceph_remove_cap(struct ceph_cap *cap); 745extern void __ceph_remove_cap(struct ceph_cap *cap);
744static inline void ceph_remove_cap(struct ceph_cap *cap) 746static inline void ceph_remove_cap(struct ceph_cap *cap)
745{ 747{
746 struct inode *inode = &cap->ci->vfs_inode; 748 spin_lock(&cap->ci->i_ceph_lock);
747 spin_lock(&inode->i_lock);
748 __ceph_remove_cap(cap); 749 __ceph_remove_cap(cap);
749 spin_unlock(&inode->i_lock); 750 spin_unlock(&cap->ci->i_ceph_lock);
750} 751}
751extern void ceph_put_cap(struct ceph_mds_client *mdsc, 752extern void ceph_put_cap(struct ceph_mds_client *mdsc,
752 struct ceph_cap *cap); 753 struct ceph_cap *cap);
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 96c6739a0280..a5e36e4488a7 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -343,8 +343,8 @@ void __ceph_destroy_xattrs(struct ceph_inode_info *ci)
343} 343}
344 344
345static int __build_xattrs(struct inode *inode) 345static int __build_xattrs(struct inode *inode)
346 __releases(inode->i_lock) 346 __releases(ci->i_ceph_lock)
347 __acquires(inode->i_lock) 347 __acquires(ci->i_ceph_lock)
348{ 348{
349 u32 namelen; 349 u32 namelen;
350 u32 numattr = 0; 350 u32 numattr = 0;
@@ -372,7 +372,7 @@ start:
372 end = p + ci->i_xattrs.blob->vec.iov_len; 372 end = p + ci->i_xattrs.blob->vec.iov_len;
373 ceph_decode_32_safe(&p, end, numattr, bad); 373 ceph_decode_32_safe(&p, end, numattr, bad);
374 xattr_version = ci->i_xattrs.version; 374 xattr_version = ci->i_xattrs.version;
375 spin_unlock(&inode->i_lock); 375 spin_unlock(&ci->i_ceph_lock);
376 376
377 xattrs = kcalloc(numattr, sizeof(struct ceph_xattr *), 377 xattrs = kcalloc(numattr, sizeof(struct ceph_xattr *),
378 GFP_NOFS); 378 GFP_NOFS);
@@ -387,7 +387,7 @@ start:
387 goto bad_lock; 387 goto bad_lock;
388 } 388 }
389 389
390 spin_lock(&inode->i_lock); 390 spin_lock(&ci->i_ceph_lock);
391 if (ci->i_xattrs.version != xattr_version) { 391 if (ci->i_xattrs.version != xattr_version) {
392 /* lost a race, retry */ 392 /* lost a race, retry */
393 for (i = 0; i < numattr; i++) 393 for (i = 0; i < numattr; i++)
@@ -418,7 +418,7 @@ start:
418 418
419 return err; 419 return err;
420bad_lock: 420bad_lock:
421 spin_lock(&inode->i_lock); 421 spin_lock(&ci->i_ceph_lock);
422bad: 422bad:
423 if (xattrs) { 423 if (xattrs) {
424 for (i = 0; i < numattr; i++) 424 for (i = 0; i < numattr; i++)
@@ -512,7 +512,7 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
512 if (vxattrs) 512 if (vxattrs)
513 vxattr = ceph_match_vxattr(vxattrs, name); 513 vxattr = ceph_match_vxattr(vxattrs, name);
514 514
515 spin_lock(&inode->i_lock); 515 spin_lock(&ci->i_ceph_lock);
516 dout("getxattr %p ver=%lld index_ver=%lld\n", inode, 516 dout("getxattr %p ver=%lld index_ver=%lld\n", inode,
517 ci->i_xattrs.version, ci->i_xattrs.index_version); 517 ci->i_xattrs.version, ci->i_xattrs.index_version);
518 518
@@ -520,14 +520,14 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
520 (ci->i_xattrs.index_version >= ci->i_xattrs.version)) { 520 (ci->i_xattrs.index_version >= ci->i_xattrs.version)) {
521 goto get_xattr; 521 goto get_xattr;
522 } else { 522 } else {
523 spin_unlock(&inode->i_lock); 523 spin_unlock(&ci->i_ceph_lock);
524 /* get xattrs from mds (if we don't already have them) */ 524 /* get xattrs from mds (if we don't already have them) */
525 err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR); 525 err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR);
526 if (err) 526 if (err)
527 return err; 527 return err;
528 } 528 }
529 529
530 spin_lock(&inode->i_lock); 530 spin_lock(&ci->i_ceph_lock);
531 531
532 if (vxattr && vxattr->readonly) { 532 if (vxattr && vxattr->readonly) {
533 err = vxattr->getxattr_cb(ci, value, size); 533 err = vxattr->getxattr_cb(ci, value, size);
@@ -558,7 +558,7 @@ get_xattr:
558 memcpy(value, xattr->val, xattr->val_len); 558 memcpy(value, xattr->val, xattr->val_len);
559 559
560out: 560out:
561 spin_unlock(&inode->i_lock); 561 spin_unlock(&ci->i_ceph_lock);
562 return err; 562 return err;
563} 563}
564 564
@@ -573,7 +573,7 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
573 u32 len; 573 u32 len;
574 int i; 574 int i;
575 575
576 spin_lock(&inode->i_lock); 576 spin_lock(&ci->i_ceph_lock);
577 dout("listxattr %p ver=%lld index_ver=%lld\n", inode, 577 dout("listxattr %p ver=%lld index_ver=%lld\n", inode,
578 ci->i_xattrs.version, ci->i_xattrs.index_version); 578 ci->i_xattrs.version, ci->i_xattrs.index_version);
579 579
@@ -581,13 +581,13 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
581 (ci->i_xattrs.index_version >= ci->i_xattrs.version)) { 581 (ci->i_xattrs.index_version >= ci->i_xattrs.version)) {
582 goto list_xattr; 582 goto list_xattr;
583 } else { 583 } else {
584 spin_unlock(&inode->i_lock); 584 spin_unlock(&ci->i_ceph_lock);
585 err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR); 585 err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR);
586 if (err) 586 if (err)
587 return err; 587 return err;
588 } 588 }
589 589
590 spin_lock(&inode->i_lock); 590 spin_lock(&ci->i_ceph_lock);
591 591
592 err = __build_xattrs(inode); 592 err = __build_xattrs(inode);
593 if (err < 0) 593 if (err < 0)
@@ -619,7 +619,7 @@ list_xattr:
619 } 619 }
620 620
621out: 621out:
622 spin_unlock(&inode->i_lock); 622 spin_unlock(&ci->i_ceph_lock);
623 return err; 623 return err;
624} 624}
625 625
@@ -739,7 +739,7 @@ int ceph_setxattr(struct dentry *dentry, const char *name,
739 if (!xattr) 739 if (!xattr)
740 goto out; 740 goto out;
741 741
742 spin_lock(&inode->i_lock); 742 spin_lock(&ci->i_ceph_lock);
743retry: 743retry:
744 issued = __ceph_caps_issued(ci, NULL); 744 issued = __ceph_caps_issued(ci, NULL);
745 if (!(issued & CEPH_CAP_XATTR_EXCL)) 745 if (!(issued & CEPH_CAP_XATTR_EXCL))
@@ -752,12 +752,12 @@ retry:
752 required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) { 752 required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
753 struct ceph_buffer *blob = NULL; 753 struct ceph_buffer *blob = NULL;
754 754
755 spin_unlock(&inode->i_lock); 755 spin_unlock(&ci->i_ceph_lock);
756 dout(" preaallocating new blob size=%d\n", required_blob_size); 756 dout(" preaallocating new blob size=%d\n", required_blob_size);
757 blob = ceph_buffer_new(required_blob_size, GFP_NOFS); 757 blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
758 if (!blob) 758 if (!blob)
759 goto out; 759 goto out;
760 spin_lock(&inode->i_lock); 760 spin_lock(&ci->i_ceph_lock);
761 if (ci->i_xattrs.prealloc_blob) 761 if (ci->i_xattrs.prealloc_blob)
762 ceph_buffer_put(ci->i_xattrs.prealloc_blob); 762 ceph_buffer_put(ci->i_xattrs.prealloc_blob);
763 ci->i_xattrs.prealloc_blob = blob; 763 ci->i_xattrs.prealloc_blob = blob;
@@ -770,13 +770,13 @@ retry:
770 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL); 770 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
771 ci->i_xattrs.dirty = true; 771 ci->i_xattrs.dirty = true;
772 inode->i_ctime = CURRENT_TIME; 772 inode->i_ctime = CURRENT_TIME;
773 spin_unlock(&inode->i_lock); 773 spin_unlock(&ci->i_ceph_lock);
774 if (dirty) 774 if (dirty)
775 __mark_inode_dirty(inode, dirty); 775 __mark_inode_dirty(inode, dirty);
776 return err; 776 return err;
777 777
778do_sync: 778do_sync:
779 spin_unlock(&inode->i_lock); 779 spin_unlock(&ci->i_ceph_lock);
780 err = ceph_sync_setxattr(dentry, name, value, size, flags); 780 err = ceph_sync_setxattr(dentry, name, value, size, flags);
781out: 781out:
782 kfree(newname); 782 kfree(newname);
@@ -833,7 +833,7 @@ int ceph_removexattr(struct dentry *dentry, const char *name)
833 return -EOPNOTSUPP; 833 return -EOPNOTSUPP;
834 } 834 }
835 835
836 spin_lock(&inode->i_lock); 836 spin_lock(&ci->i_ceph_lock);
837 __build_xattrs(inode); 837 __build_xattrs(inode);
838 issued = __ceph_caps_issued(ci, NULL); 838 issued = __ceph_caps_issued(ci, NULL);
839 dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued)); 839 dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued));
@@ -846,12 +846,12 @@ int ceph_removexattr(struct dentry *dentry, const char *name)
846 ci->i_xattrs.dirty = true; 846 ci->i_xattrs.dirty = true;
847 inode->i_ctime = CURRENT_TIME; 847 inode->i_ctime = CURRENT_TIME;
848 848
849 spin_unlock(&inode->i_lock); 849 spin_unlock(&ci->i_ceph_lock);
850 if (dirty) 850 if (dirty)
851 __mark_inode_dirty(inode, dirty); 851 __mark_inode_dirty(inode, dirty);
852 return err; 852 return err;
853do_sync: 853do_sync:
854 spin_unlock(&inode->i_lock); 854 spin_unlock(&ci->i_ceph_lock);
855 err = ceph_send_removexattr(dentry, name); 855 err = ceph_send_removexattr(dentry, name);
856 return err; 856 return err;
857} 857}
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index d6a972df0338..8cd4b52d4217 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -441,6 +441,8 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct kvec *iov_orig,
441 smb_msg.msg_controllen = 0; 441 smb_msg.msg_controllen = 0;
442 442
443 for (total_read = 0; to_read; total_read += length, to_read -= length) { 443 for (total_read = 0; to_read; total_read += length, to_read -= length) {
444 try_to_freeze();
445
444 if (server_unresponsive(server)) { 446 if (server_unresponsive(server)) {
445 total_read = -EAGAIN; 447 total_read = -EAGAIN;
446 break; 448 break;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index cf0b1539b321..4dd9283885e7 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -702,6 +702,13 @@ cifs_find_lock_conflict(struct cifsInodeInfo *cinode, struct cifsLockInfo *lock,
702 lock->type, lock->netfid, conf_lock); 702 lock->type, lock->netfid, conf_lock);
703} 703}
704 704
705/*
706 * Check if there is another lock that prevents us to set the lock (mandatory
707 * style). If such a lock exists, update the flock structure with its
708 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
709 * or leave it the same if we can't. Returns 0 if we don't need to request to
710 * the server or 1 otherwise.
711 */
705static int 712static int
706cifs_lock_test(struct cifsInodeInfo *cinode, __u64 offset, __u64 length, 713cifs_lock_test(struct cifsInodeInfo *cinode, __u64 offset, __u64 length,
707 __u8 type, __u16 netfid, struct file_lock *flock) 714 __u8 type, __u16 netfid, struct file_lock *flock)
@@ -739,6 +746,12 @@ cifs_lock_add(struct cifsInodeInfo *cinode, struct cifsLockInfo *lock)
739 mutex_unlock(&cinode->lock_mutex); 746 mutex_unlock(&cinode->lock_mutex);
740} 747}
741 748
749/*
750 * Set the byte-range lock (mandatory style). Returns:
751 * 1) 0, if we set the lock and don't need to request to the server;
752 * 2) 1, if no locks prevent us but we need to request to the server;
753 * 3) -EACCESS, if there is a lock that prevents us and wait is false.
754 */
742static int 755static int
743cifs_lock_add_if(struct cifsInodeInfo *cinode, struct cifsLockInfo *lock, 756cifs_lock_add_if(struct cifsInodeInfo *cinode, struct cifsLockInfo *lock,
744 bool wait) 757 bool wait)
@@ -778,6 +791,13 @@ try_again:
778 return rc; 791 return rc;
779} 792}
780 793
794/*
795 * Check if there is another lock that prevents us to set the lock (posix
796 * style). If such a lock exists, update the flock structure with its
797 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
798 * or leave it the same if we can't. Returns 0 if we don't need to request to
799 * the server or 1 otherwise.
800 */
781static int 801static int
782cifs_posix_lock_test(struct file *file, struct file_lock *flock) 802cifs_posix_lock_test(struct file *file, struct file_lock *flock)
783{ 803{
@@ -800,6 +820,12 @@ cifs_posix_lock_test(struct file *file, struct file_lock *flock)
800 return rc; 820 return rc;
801} 821}
802 822
823/*
824 * Set the byte-range lock (posix style). Returns:
825 * 1) 0, if we set the lock and don't need to request to the server;
826 * 2) 1, if we need to request to the server;
827 * 3) <0, if the error occurs while setting the lock.
828 */
803static int 829static int
804cifs_posix_lock_set(struct file *file, struct file_lock *flock) 830cifs_posix_lock_set(struct file *file, struct file_lock *flock)
805{ 831{
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 5de03ec20144..a090bbe6ee29 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -554,7 +554,10 @@ static int find_cifs_entry(const int xid, struct cifs_tcon *pTcon,
554 rc); 554 rc);
555 return rc; 555 return rc;
556 } 556 }
557 cifs_save_resume_key(cifsFile->srch_inf.last_entry, cifsFile); 557 /* FindFirst/Next set last_entry to NULL on malformed reply */
558 if (cifsFile->srch_inf.last_entry)
559 cifs_save_resume_key(cifsFile->srch_inf.last_entry,
560 cifsFile);
558 } 561 }
559 562
560 while ((index_to_find >= cifsFile->srch_inf.index_of_last_entry) && 563 while ((index_to_find >= cifsFile->srch_inf.index_of_last_entry) &&
@@ -562,7 +565,10 @@ static int find_cifs_entry(const int xid, struct cifs_tcon *pTcon,
562 cFYI(1, "calling findnext2"); 565 cFYI(1, "calling findnext2");
563 rc = CIFSFindNext(xid, pTcon, cifsFile->netfid, 566 rc = CIFSFindNext(xid, pTcon, cifsFile->netfid,
564 &cifsFile->srch_inf); 567 &cifsFile->srch_inf);
565 cifs_save_resume_key(cifsFile->srch_inf.last_entry, cifsFile); 568 /* FindFirst/Next set last_entry to NULL on malformed reply */
569 if (cifsFile->srch_inf.last_entry)
570 cifs_save_resume_key(cifsFile->srch_inf.last_entry,
571 cifsFile);
566 if (rc) 572 if (rc)
567 return -ENOENT; 573 return -ENOENT;
568 } 574 }
diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c
index 7cacba12b8f1..80d850881938 100644
--- a/fs/cifs/smbencrypt.c
+++ b/fs/cifs/smbencrypt.c
@@ -209,7 +209,7 @@ E_md4hash(const unsigned char *passwd, unsigned char *p16,
209{ 209{
210 int rc; 210 int rc;
211 int len; 211 int len;
212 __u16 wpwd[129]; 212 __le16 wpwd[129];
213 213
214 /* Password cannot be longer than 128 characters */ 214 /* Password cannot be longer than 128 characters */
215 if (passwd) /* Password must be converted to NT unicode */ 215 if (passwd) /* Password must be converted to NT unicode */
@@ -219,8 +219,8 @@ E_md4hash(const unsigned char *passwd, unsigned char *p16,
219 *wpwd = 0; /* Ensure string is null terminated */ 219 *wpwd = 0; /* Ensure string is null terminated */
220 } 220 }
221 221
222 rc = mdfour(p16, (unsigned char *) wpwd, len * sizeof(__u16)); 222 rc = mdfour(p16, (unsigned char *) wpwd, len * sizeof(__le16));
223 memset(wpwd, 0, 129 * sizeof(__u16)); 223 memset(wpwd, 0, 129 * sizeof(__le16));
224 224
225 return rc; 225 return rc;
226} 226}
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index ca418aaf6352..9d8715c45f25 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -292,7 +292,7 @@ int __init configfs_inode_init(void)
292 return bdi_init(&configfs_backing_dev_info); 292 return bdi_init(&configfs_backing_dev_info);
293} 293}
294 294
295void __exit configfs_inode_exit(void) 295void configfs_inode_exit(void)
296{ 296{
297 bdi_destroy(&configfs_backing_dev_info); 297 bdi_destroy(&configfs_backing_dev_info);
298} 298}
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index ecc62178beda..276e15cafd58 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -143,28 +143,26 @@ static int __init configfs_init(void)
143 goto out; 143 goto out;
144 144
145 config_kobj = kobject_create_and_add("config", kernel_kobj); 145 config_kobj = kobject_create_and_add("config", kernel_kobj);
146 if (!config_kobj) { 146 if (!config_kobj)
147 kmem_cache_destroy(configfs_dir_cachep); 147 goto out2;
148 configfs_dir_cachep = NULL; 148
149 goto out; 149 err = configfs_inode_init();
150 } 150 if (err)
151 goto out3;
151 152
152 err = register_filesystem(&configfs_fs_type); 153 err = register_filesystem(&configfs_fs_type);
153 if (err) { 154 if (err)
154 printk(KERN_ERR "configfs: Unable to register filesystem!\n"); 155 goto out4;
155 kobject_put(config_kobj);
156 kmem_cache_destroy(configfs_dir_cachep);
157 configfs_dir_cachep = NULL;
158 goto out;
159 }
160 156
161 err = configfs_inode_init(); 157 return 0;
162 if (err) { 158out4:
163 unregister_filesystem(&configfs_fs_type); 159 printk(KERN_ERR "configfs: Unable to register filesystem!\n");
164 kobject_put(config_kobj); 160 configfs_inode_exit();
165 kmem_cache_destroy(configfs_dir_cachep); 161out3:
166 configfs_dir_cachep = NULL; 162 kobject_put(config_kobj);
167 } 163out2:
164 kmem_cache_destroy(configfs_dir_cachep);
165 configfs_dir_cachep = NULL;
168out: 166out:
169 return err; 167 return err;
170} 168}
diff --git a/fs/dcache.c b/fs/dcache.c
index 10ba92def3f6..89509b5a090e 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2439,16 +2439,14 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name)
2439/** 2439/**
2440 * prepend_path - Prepend path string to a buffer 2440 * prepend_path - Prepend path string to a buffer
2441 * @path: the dentry/vfsmount to report 2441 * @path: the dentry/vfsmount to report
2442 * @root: root vfsmnt/dentry (may be modified by this function) 2442 * @root: root vfsmnt/dentry
2443 * @buffer: pointer to the end of the buffer 2443 * @buffer: pointer to the end of the buffer
2444 * @buflen: pointer to buffer length 2444 * @buflen: pointer to buffer length
2445 * 2445 *
2446 * Caller holds the rename_lock. 2446 * Caller holds the rename_lock.
2447 *
2448 * If path is not reachable from the supplied root, then the value of
2449 * root is changed (without modifying refcounts).
2450 */ 2447 */
2451static int prepend_path(const struct path *path, struct path *root, 2448static int prepend_path(const struct path *path,
2449 const struct path *root,
2452 char **buffer, int *buflen) 2450 char **buffer, int *buflen)
2453{ 2451{
2454 struct dentry *dentry = path->dentry; 2452 struct dentry *dentry = path->dentry;
@@ -2483,10 +2481,10 @@ static int prepend_path(const struct path *path, struct path *root,
2483 dentry = parent; 2481 dentry = parent;
2484 } 2482 }
2485 2483
2486out:
2487 if (!error && !slash) 2484 if (!error && !slash)
2488 error = prepend(buffer, buflen, "/", 1); 2485 error = prepend(buffer, buflen, "/", 1);
2489 2486
2487out:
2490 br_read_unlock(vfsmount_lock); 2488 br_read_unlock(vfsmount_lock);
2491 return error; 2489 return error;
2492 2490
@@ -2500,15 +2498,17 @@ global_root:
2500 WARN(1, "Root dentry has weird name <%.*s>\n", 2498 WARN(1, "Root dentry has weird name <%.*s>\n",
2501 (int) dentry->d_name.len, dentry->d_name.name); 2499 (int) dentry->d_name.len, dentry->d_name.name);
2502 } 2500 }
2503 root->mnt = vfsmnt; 2501 if (!slash)
2504 root->dentry = dentry; 2502 error = prepend(buffer, buflen, "/", 1);
2503 if (!error)
2504 error = vfsmnt->mnt_ns ? 1 : 2;
2505 goto out; 2505 goto out;
2506} 2506}
2507 2507
2508/** 2508/**
2509 * __d_path - return the path of a dentry 2509 * __d_path - return the path of a dentry
2510 * @path: the dentry/vfsmount to report 2510 * @path: the dentry/vfsmount to report
2511 * @root: root vfsmnt/dentry (may be modified by this function) 2511 * @root: root vfsmnt/dentry
2512 * @buf: buffer to return value in 2512 * @buf: buffer to return value in
2513 * @buflen: buffer length 2513 * @buflen: buffer length
2514 * 2514 *
@@ -2519,10 +2519,10 @@ global_root:
2519 * 2519 *
2520 * "buflen" should be positive. 2520 * "buflen" should be positive.
2521 * 2521 *
2522 * If path is not reachable from the supplied root, then the value of 2522 * If the path is not reachable from the supplied root, return %NULL.
2523 * root is changed (without modifying refcounts).
2524 */ 2523 */
2525char *__d_path(const struct path *path, struct path *root, 2524char *__d_path(const struct path *path,
2525 const struct path *root,
2526 char *buf, int buflen) 2526 char *buf, int buflen)
2527{ 2527{
2528 char *res = buf + buflen; 2528 char *res = buf + buflen;
@@ -2533,7 +2533,28 @@ char *__d_path(const struct path *path, struct path *root,
2533 error = prepend_path(path, root, &res, &buflen); 2533 error = prepend_path(path, root, &res, &buflen);
2534 write_sequnlock(&rename_lock); 2534 write_sequnlock(&rename_lock);
2535 2535
2536 if (error) 2536 if (error < 0)
2537 return ERR_PTR(error);
2538 if (error > 0)
2539 return NULL;
2540 return res;
2541}
2542
2543char *d_absolute_path(const struct path *path,
2544 char *buf, int buflen)
2545{
2546 struct path root = {};
2547 char *res = buf + buflen;
2548 int error;
2549
2550 prepend(&res, &buflen, "\0", 1);
2551 write_seqlock(&rename_lock);
2552 error = prepend_path(path, &root, &res, &buflen);
2553 write_sequnlock(&rename_lock);
2554
2555 if (error > 1)
2556 error = -EINVAL;
2557 if (error < 0)
2537 return ERR_PTR(error); 2558 return ERR_PTR(error);
2538 return res; 2559 return res;
2539} 2560}
@@ -2541,8 +2562,9 @@ char *__d_path(const struct path *path, struct path *root,
2541/* 2562/*
2542 * same as __d_path but appends "(deleted)" for unlinked files. 2563 * same as __d_path but appends "(deleted)" for unlinked files.
2543 */ 2564 */
2544static int path_with_deleted(const struct path *path, struct path *root, 2565static int path_with_deleted(const struct path *path,
2545 char **buf, int *buflen) 2566 const struct path *root,
2567 char **buf, int *buflen)
2546{ 2568{
2547 prepend(buf, buflen, "\0", 1); 2569 prepend(buf, buflen, "\0", 1);
2548 if (d_unlinked(path->dentry)) { 2570 if (d_unlinked(path->dentry)) {
@@ -2579,7 +2601,6 @@ char *d_path(const struct path *path, char *buf, int buflen)
2579{ 2601{
2580 char *res = buf + buflen; 2602 char *res = buf + buflen;
2581 struct path root; 2603 struct path root;
2582 struct path tmp;
2583 int error; 2604 int error;
2584 2605
2585 /* 2606 /*
@@ -2594,9 +2615,8 @@ char *d_path(const struct path *path, char *buf, int buflen)
2594 2615
2595 get_fs_root(current->fs, &root); 2616 get_fs_root(current->fs, &root);
2596 write_seqlock(&rename_lock); 2617 write_seqlock(&rename_lock);
2597 tmp = root; 2618 error = path_with_deleted(path, &root, &res, &buflen);
2598 error = path_with_deleted(path, &tmp, &res, &buflen); 2619 if (error < 0)
2599 if (error)
2600 res = ERR_PTR(error); 2620 res = ERR_PTR(error);
2601 write_sequnlock(&rename_lock); 2621 write_sequnlock(&rename_lock);
2602 path_put(&root); 2622 path_put(&root);
@@ -2617,7 +2637,6 @@ char *d_path_with_unreachable(const struct path *path, char *buf, int buflen)
2617{ 2637{
2618 char *res = buf + buflen; 2638 char *res = buf + buflen;
2619 struct path root; 2639 struct path root;
2620 struct path tmp;
2621 int error; 2640 int error;
2622 2641
2623 if (path->dentry->d_op && path->dentry->d_op->d_dname) 2642 if (path->dentry->d_op && path->dentry->d_op->d_dname)
@@ -2625,9 +2644,8 @@ char *d_path_with_unreachable(const struct path *path, char *buf, int buflen)
2625 2644
2626 get_fs_root(current->fs, &root); 2645 get_fs_root(current->fs, &root);
2627 write_seqlock(&rename_lock); 2646 write_seqlock(&rename_lock);
2628 tmp = root; 2647 error = path_with_deleted(path, &root, &res, &buflen);
2629 error = path_with_deleted(path, &tmp, &res, &buflen); 2648 if (error > 0)
2630 if (!error && !path_equal(&tmp, &root))
2631 error = prepend_unreachable(&res, &buflen); 2649 error = prepend_unreachable(&res, &buflen);
2632 write_sequnlock(&rename_lock); 2650 write_sequnlock(&rename_lock);
2633 path_put(&root); 2651 path_put(&root);
@@ -2758,19 +2776,18 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2758 write_seqlock(&rename_lock); 2776 write_seqlock(&rename_lock);
2759 if (!d_unlinked(pwd.dentry)) { 2777 if (!d_unlinked(pwd.dentry)) {
2760 unsigned long len; 2778 unsigned long len;
2761 struct path tmp = root;
2762 char *cwd = page + PAGE_SIZE; 2779 char *cwd = page + PAGE_SIZE;
2763 int buflen = PAGE_SIZE; 2780 int buflen = PAGE_SIZE;
2764 2781
2765 prepend(&cwd, &buflen, "\0", 1); 2782 prepend(&cwd, &buflen, "\0", 1);
2766 error = prepend_path(&pwd, &tmp, &cwd, &buflen); 2783 error = prepend_path(&pwd, &root, &cwd, &buflen);
2767 write_sequnlock(&rename_lock); 2784 write_sequnlock(&rename_lock);
2768 2785
2769 if (error) 2786 if (error < 0)
2770 goto out; 2787 goto out;
2771 2788
2772 /* Unreachable from current root */ 2789 /* Unreachable from current root */
2773 if (!path_equal(&tmp, &root)) { 2790 if (error > 0) {
2774 error = prepend_unreachable(&cwd, &buflen); 2791 error = prepend_unreachable(&cwd, &buflen);
2775 if (error) 2792 if (error)
2776 goto out; 2793 goto out;
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 61fa9e1614af..607b1557d292 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1095,7 +1095,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
1095 le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block), 1095 le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block),
1096 ext4_idx_pblock(EXT_FIRST_INDEX(neh))); 1096 ext4_idx_pblock(EXT_FIRST_INDEX(neh)));
1097 1097
1098 neh->eh_depth = cpu_to_le16(neh->eh_depth + 1); 1098 neh->eh_depth = cpu_to_le16(le16_to_cpu(neh->eh_depth) + 1);
1099 ext4_mark_inode_dirty(handle, inode); 1099 ext4_mark_inode_dirty(handle, inode);
1100out: 1100out:
1101 brelse(bh); 1101 brelse(bh);
@@ -2955,7 +2955,6 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2955 /* Pre-conditions */ 2955 /* Pre-conditions */
2956 BUG_ON(!ext4_ext_is_uninitialized(ex)); 2956 BUG_ON(!ext4_ext_is_uninitialized(ex));
2957 BUG_ON(!in_range(map->m_lblk, ee_block, ee_len)); 2957 BUG_ON(!in_range(map->m_lblk, ee_block, ee_len));
2958 BUG_ON(map->m_lblk + map->m_len > ee_block + ee_len);
2959 2958
2960 /* 2959 /*
2961 * Attempt to transfer newly initialized blocks from the currently 2960 * Attempt to transfer newly initialized blocks from the currently
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 848f436df29f..92655fd89657 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1339,8 +1339,11 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
1339 clear_buffer_unwritten(bh); 1339 clear_buffer_unwritten(bh);
1340 } 1340 }
1341 1341
1342 /* skip page if block allocation undone */ 1342 /*
1343 if (buffer_delay(bh) || buffer_unwritten(bh)) 1343 * skip page if block allocation undone and
1344 * block is dirty
1345 */
1346 if (ext4_bh_delay_or_unwritten(NULL, bh))
1344 skip_page = 1; 1347 skip_page = 1;
1345 bh = bh->b_this_page; 1348 bh = bh->b_this_page;
1346 block_start += bh->b_size; 1349 block_start += bh->b_size;
@@ -2387,7 +2390,6 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
2387 pgoff_t index; 2390 pgoff_t index;
2388 struct inode *inode = mapping->host; 2391 struct inode *inode = mapping->host;
2389 handle_t *handle; 2392 handle_t *handle;
2390 loff_t page_len;
2391 2393
2392 index = pos >> PAGE_CACHE_SHIFT; 2394 index = pos >> PAGE_CACHE_SHIFT;
2393 2395
@@ -2434,13 +2436,6 @@ retry:
2434 */ 2436 */
2435 if (pos + len > inode->i_size) 2437 if (pos + len > inode->i_size)
2436 ext4_truncate_failed_write(inode); 2438 ext4_truncate_failed_write(inode);
2437 } else {
2438 page_len = pos & (PAGE_CACHE_SIZE - 1);
2439 if (page_len > 0) {
2440 ret = ext4_discard_partial_page_buffers_no_lock(handle,
2441 inode, page, pos - page_len, page_len,
2442 EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED);
2443 }
2444 } 2439 }
2445 2440
2446 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 2441 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@@ -2483,7 +2478,6 @@ static int ext4_da_write_end(struct file *file,
2483 loff_t new_i_size; 2478 loff_t new_i_size;
2484 unsigned long start, end; 2479 unsigned long start, end;
2485 int write_mode = (int)(unsigned long)fsdata; 2480 int write_mode = (int)(unsigned long)fsdata;
2486 loff_t page_len;
2487 2481
2488 if (write_mode == FALL_BACK_TO_NONDELALLOC) { 2482 if (write_mode == FALL_BACK_TO_NONDELALLOC) {
2489 if (ext4_should_order_data(inode)) { 2483 if (ext4_should_order_data(inode)) {
@@ -2508,7 +2502,7 @@ static int ext4_da_write_end(struct file *file,
2508 */ 2502 */
2509 2503
2510 new_i_size = pos + copied; 2504 new_i_size = pos + copied;
2511 if (new_i_size > EXT4_I(inode)->i_disksize) { 2505 if (copied && new_i_size > EXT4_I(inode)->i_disksize) {
2512 if (ext4_da_should_update_i_disksize(page, end)) { 2506 if (ext4_da_should_update_i_disksize(page, end)) {
2513 down_write(&EXT4_I(inode)->i_data_sem); 2507 down_write(&EXT4_I(inode)->i_data_sem);
2514 if (new_i_size > EXT4_I(inode)->i_disksize) { 2508 if (new_i_size > EXT4_I(inode)->i_disksize) {
@@ -2532,16 +2526,6 @@ static int ext4_da_write_end(struct file *file,
2532 } 2526 }
2533 ret2 = generic_write_end(file, mapping, pos, len, copied, 2527 ret2 = generic_write_end(file, mapping, pos, len, copied,
2534 page, fsdata); 2528 page, fsdata);
2535
2536 page_len = PAGE_CACHE_SIZE -
2537 ((pos + copied - 1) & (PAGE_CACHE_SIZE - 1));
2538
2539 if (page_len > 0) {
2540 ret = ext4_discard_partial_page_buffers_no_lock(handle,
2541 inode, page, pos + copied - 1, page_len,
2542 EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED);
2543 }
2544
2545 copied = ret2; 2529 copied = ret2;
2546 if (ret2 < 0) 2530 if (ret2 < 0)
2547 ret = ret2; 2531 ret = ret2;
@@ -2781,10 +2765,11 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
2781 iocb->private, io_end->inode->i_ino, iocb, offset, 2765 iocb->private, io_end->inode->i_ino, iocb, offset,
2782 size); 2766 size);
2783 2767
2768 iocb->private = NULL;
2769
2784 /* if not aio dio with unwritten extents, just free io and return */ 2770 /* if not aio dio with unwritten extents, just free io and return */
2785 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { 2771 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
2786 ext4_free_io_end(io_end); 2772 ext4_free_io_end(io_end);
2787 iocb->private = NULL;
2788out: 2773out:
2789 if (is_async) 2774 if (is_async)
2790 aio_complete(iocb, ret, 0); 2775 aio_complete(iocb, ret, 0);
@@ -2807,7 +2792,6 @@ out:
2807 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); 2792 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
2808 2793
2809 /* queue the work to convert unwritten extents to written */ 2794 /* queue the work to convert unwritten extents to written */
2810 iocb->private = NULL;
2811 queue_work(wq, &io_end->work); 2795 queue_work(wq, &io_end->work);
2812 2796
2813 /* XXX: probably should move into the real I/O completion handler */ 2797 /* XXX: probably should move into the real I/O completion handler */
@@ -3203,26 +3187,8 @@ int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
3203 3187
3204 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); 3188 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
3205 3189
3206 if (!page_has_buffers(page)) { 3190 if (!page_has_buffers(page))
3207 /* 3191 create_empty_buffers(page, blocksize, 0);
3208 * If the range to be discarded covers a partial block
3209 * we need to get the page buffers. This is because
3210 * partial blocks cannot be released and the page needs
3211 * to be updated with the contents of the block before
3212 * we write the zeros on top of it.
3213 */
3214 if ((from & (blocksize - 1)) ||
3215 ((from + length) & (blocksize - 1))) {
3216 create_empty_buffers(page, blocksize, 0);
3217 } else {
3218 /*
3219 * If there are no partial blocks,
3220 * there is nothing to update,
3221 * so we can return now
3222 */
3223 return 0;
3224 }
3225 }
3226 3192
3227 /* Find the buffer that contains "offset" */ 3193 /* Find the buffer that contains "offset" */
3228 bh = page_buffers(page); 3194 bh = page_buffers(page);
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 7ce1d0b19c94..7e106c810c62 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -385,6 +385,18 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
385 385
386 block_end = block_start + blocksize; 386 block_end = block_start + blocksize;
387 if (block_start >= len) { 387 if (block_start >= len) {
388 /*
389 * Comments copied from block_write_full_page_endio:
390 *
391 * The page straddles i_size. It must be zeroed out on
392 * each and every writepage invocation because it may
393 * be mmapped. "A file is mapped in multiples of the
394 * page size. For a file that is not a multiple of
395 * the page size, the remaining memory is zeroed when
396 * mapped, and writes to that region are not written
397 * out to the file."
398 */
399 zero_user_segment(page, block_start, block_end);
388 clear_buffer_dirty(bh); 400 clear_buffer_dirty(bh);
389 set_buffer_uptodate(bh); 401 set_buffer_uptodate(bh);
390 continue; 402 continue;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 3858767ec672..3e1329e2f826 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1155,9 +1155,9 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
1155 seq_puts(seq, ",block_validity"); 1155 seq_puts(seq, ",block_validity");
1156 1156
1157 if (!test_opt(sb, INIT_INODE_TABLE)) 1157 if (!test_opt(sb, INIT_INODE_TABLE))
1158 seq_puts(seq, ",noinit_inode_table"); 1158 seq_puts(seq, ",noinit_itable");
1159 else if (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT) 1159 else if (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)
1160 seq_printf(seq, ",init_inode_table=%u", 1160 seq_printf(seq, ",init_itable=%u",
1161 (unsigned) sbi->s_li_wait_mult); 1161 (unsigned) sbi->s_li_wait_mult);
1162 1162
1163 ext4_show_quota_options(seq, sb); 1163 ext4_show_quota_options(seq, sb);
@@ -1333,8 +1333,7 @@ enum {
1333 Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity, 1333 Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
1334 Opt_inode_readahead_blks, Opt_journal_ioprio, 1334 Opt_inode_readahead_blks, Opt_journal_ioprio,
1335 Opt_dioread_nolock, Opt_dioread_lock, 1335 Opt_dioread_nolock, Opt_dioread_lock,
1336 Opt_discard, Opt_nodiscard, 1336 Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
1337 Opt_init_inode_table, Opt_noinit_inode_table,
1338}; 1337};
1339 1338
1340static const match_table_t tokens = { 1339static const match_table_t tokens = {
@@ -1407,9 +1406,9 @@ static const match_table_t tokens = {
1407 {Opt_dioread_lock, "dioread_lock"}, 1406 {Opt_dioread_lock, "dioread_lock"},
1408 {Opt_discard, "discard"}, 1407 {Opt_discard, "discard"},
1409 {Opt_nodiscard, "nodiscard"}, 1408 {Opt_nodiscard, "nodiscard"},
1410 {Opt_init_inode_table, "init_itable=%u"}, 1409 {Opt_init_itable, "init_itable=%u"},
1411 {Opt_init_inode_table, "init_itable"}, 1410 {Opt_init_itable, "init_itable"},
1412 {Opt_noinit_inode_table, "noinit_itable"}, 1411 {Opt_noinit_itable, "noinit_itable"},
1413 {Opt_err, NULL}, 1412 {Opt_err, NULL},
1414}; 1413};
1415 1414
@@ -1892,7 +1891,7 @@ set_qf_format:
1892 case Opt_dioread_lock: 1891 case Opt_dioread_lock:
1893 clear_opt(sb, DIOREAD_NOLOCK); 1892 clear_opt(sb, DIOREAD_NOLOCK);
1894 break; 1893 break;
1895 case Opt_init_inode_table: 1894 case Opt_init_itable:
1896 set_opt(sb, INIT_INODE_TABLE); 1895 set_opt(sb, INIT_INODE_TABLE);
1897 if (args[0].from) { 1896 if (args[0].from) {
1898 if (match_int(&args[0], &option)) 1897 if (match_int(&args[0], &option))
@@ -1903,7 +1902,7 @@ set_qf_format:
1903 return 0; 1902 return 0;
1904 sbi->s_li_wait_mult = option; 1903 sbi->s_li_wait_mult = option;
1905 break; 1904 break;
1906 case Opt_noinit_inode_table: 1905 case Opt_noinit_itable:
1907 clear_opt(sb, INIT_INODE_TABLE); 1906 clear_opt(sb, INIT_INODE_TABLE);
1908 break; 1907 break;
1909 default: 1908 default:
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 73c3992b2bb4..517f211a3bd4 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -47,17 +47,6 @@ struct wb_writeback_work {
47 struct completion *done; /* set if the caller waits */ 47 struct completion *done; /* set if the caller waits */
48}; 48};
49 49
50const char *wb_reason_name[] = {
51 [WB_REASON_BACKGROUND] = "background",
52 [WB_REASON_TRY_TO_FREE_PAGES] = "try_to_free_pages",
53 [WB_REASON_SYNC] = "sync",
54 [WB_REASON_PERIODIC] = "periodic",
55 [WB_REASON_LAPTOP_TIMER] = "laptop_timer",
56 [WB_REASON_FREE_MORE_MEM] = "free_more_memory",
57 [WB_REASON_FS_FREE_SPACE] = "fs_free_space",
58 [WB_REASON_FORKER_THREAD] = "forker_thread"
59};
60
61/* 50/*
62 * Include the creation of the trace points after defining the 51 * Include the creation of the trace points after defining the
63 * wb_writeback_work structure so that the definition remains local to this 52 * wb_writeback_work structure so that the definition remains local to this
@@ -156,6 +145,7 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
156 * bdi_start_writeback - start writeback 145 * bdi_start_writeback - start writeback
157 * @bdi: the backing device to write from 146 * @bdi: the backing device to write from
158 * @nr_pages: the number of pages to write 147 * @nr_pages: the number of pages to write
148 * @reason: reason why some writeback work was initiated
159 * 149 *
160 * Description: 150 * Description:
161 * This does WB_SYNC_NONE opportunistic writeback. The IO is only 151 * This does WB_SYNC_NONE opportunistic writeback. The IO is only
@@ -1223,6 +1213,7 @@ static void wait_sb_inodes(struct super_block *sb)
1223 * writeback_inodes_sb_nr - writeback dirty inodes from given super_block 1213 * writeback_inodes_sb_nr - writeback dirty inodes from given super_block
1224 * @sb: the superblock 1214 * @sb: the superblock
1225 * @nr: the number of pages to write 1215 * @nr: the number of pages to write
1216 * @reason: reason why some writeback work initiated
1226 * 1217 *
1227 * Start writeback on some inodes on this super_block. No guarantees are made 1218 * Start writeback on some inodes on this super_block. No guarantees are made
1228 * on how many (if any) will be written, and this function does not wait 1219 * on how many (if any) will be written, and this function does not wait
@@ -1251,6 +1242,7 @@ EXPORT_SYMBOL(writeback_inodes_sb_nr);
1251/** 1242/**
1252 * writeback_inodes_sb - writeback dirty inodes from given super_block 1243 * writeback_inodes_sb - writeback dirty inodes from given super_block
1253 * @sb: the superblock 1244 * @sb: the superblock
1245 * @reason: reason why some writeback work was initiated
1254 * 1246 *
1255 * Start writeback on some inodes on this super_block. No guarantees are made 1247 * Start writeback on some inodes on this super_block. No guarantees are made
1256 * on how many (if any) will be written, and this function does not wait 1248 * on how many (if any) will be written, and this function does not wait
@@ -1265,6 +1257,7 @@ EXPORT_SYMBOL(writeback_inodes_sb);
1265/** 1257/**
1266 * writeback_inodes_sb_if_idle - start writeback if none underway 1258 * writeback_inodes_sb_if_idle - start writeback if none underway
1267 * @sb: the superblock 1259 * @sb: the superblock
1260 * @reason: reason why some writeback work was initiated
1268 * 1261 *
1269 * Invoke writeback_inodes_sb if no writeback is currently underway. 1262 * Invoke writeback_inodes_sb if no writeback is currently underway.
1270 * Returns 1 if writeback was started, 0 if not. 1263 * Returns 1 if writeback was started, 0 if not.
@@ -1285,6 +1278,7 @@ EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
1285 * writeback_inodes_sb_if_idle - start writeback if none underway 1278 * writeback_inodes_sb_if_idle - start writeback if none underway
1286 * @sb: the superblock 1279 * @sb: the superblock
1287 * @nr: the number of pages to write 1280 * @nr: the number of pages to write
1281 * @reason: reason why some writeback work was initiated
1288 * 1282 *
1289 * Invoke writeback_inodes_sb if no writeback is currently underway. 1283 * Invoke writeback_inodes_sb if no writeback is currently underway.
1290 * Returns 1 if writeback was started, 0 if not. 1284 * Returns 1 if writeback was started, 0 if not.
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 5cb8614508c3..2aaf3eaaf13d 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1512,7 +1512,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
1512 else if (outarg->offset + num > file_size) 1512 else if (outarg->offset + num > file_size)
1513 num = file_size - outarg->offset; 1513 num = file_size - outarg->offset;
1514 1514
1515 while (num) { 1515 while (num && req->num_pages < FUSE_MAX_PAGES_PER_REQ) {
1516 struct page *page; 1516 struct page *page;
1517 unsigned int this_num; 1517 unsigned int this_num;
1518 1518
@@ -1526,6 +1526,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
1526 1526
1527 num -= this_num; 1527 num -= this_num;
1528 total_len += this_num; 1528 total_len += this_num;
1529 index++;
1529 } 1530 }
1530 req->misc.retrieve_in.offset = outarg->offset; 1531 req->misc.retrieve_in.offset = outarg->offset;
1531 req->misc.retrieve_in.size = total_len; 1532 req->misc.retrieve_in.size = total_len;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 594f07a81c28..0c84100acd44 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1556,7 +1556,7 @@ static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin)
1556 struct inode *inode = file->f_path.dentry->d_inode; 1556 struct inode *inode = file->f_path.dentry->d_inode;
1557 1557
1558 mutex_lock(&inode->i_mutex); 1558 mutex_lock(&inode->i_mutex);
1559 if (origin != SEEK_CUR || origin != SEEK_SET) { 1559 if (origin != SEEK_CUR && origin != SEEK_SET) {
1560 retval = fuse_update_attributes(inode, NULL, file, NULL); 1560 retval = fuse_update_attributes(inode, NULL, file, NULL);
1561 if (retval) 1561 if (retval)
1562 goto exit; 1562 goto exit;
@@ -1567,6 +1567,10 @@ static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin)
1567 offset += i_size_read(inode); 1567 offset += i_size_read(inode);
1568 break; 1568 break;
1569 case SEEK_CUR: 1569 case SEEK_CUR:
1570 if (offset == 0) {
1571 retval = file->f_pos;
1572 goto exit;
1573 }
1570 offset += file->f_pos; 1574 offset += file->f_pos;
1571 break; 1575 break;
1572 case SEEK_DATA: 1576 case SEEK_DATA:
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 3e6d72756479..aa83109b9431 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1138,28 +1138,28 @@ static int __init fuse_fs_init(void)
1138{ 1138{
1139 int err; 1139 int err;
1140 1140
1141 err = register_filesystem(&fuse_fs_type);
1142 if (err)
1143 goto out;
1144
1145 err = register_fuseblk();
1146 if (err)
1147 goto out_unreg;
1148
1149 fuse_inode_cachep = kmem_cache_create("fuse_inode", 1141 fuse_inode_cachep = kmem_cache_create("fuse_inode",
1150 sizeof(struct fuse_inode), 1142 sizeof(struct fuse_inode),
1151 0, SLAB_HWCACHE_ALIGN, 1143 0, SLAB_HWCACHE_ALIGN,
1152 fuse_inode_init_once); 1144 fuse_inode_init_once);
1153 err = -ENOMEM; 1145 err = -ENOMEM;
1154 if (!fuse_inode_cachep) 1146 if (!fuse_inode_cachep)
1155 goto out_unreg2; 1147 goto out;
1148
1149 err = register_fuseblk();
1150 if (err)
1151 goto out2;
1152
1153 err = register_filesystem(&fuse_fs_type);
1154 if (err)
1155 goto out3;
1156 1156
1157 return 0; 1157 return 0;
1158 1158
1159 out_unreg2: 1159 out3:
1160 unregister_fuseblk(); 1160 unregister_fuseblk();
1161 out_unreg: 1161 out2:
1162 unregister_filesystem(&fuse_fs_type); 1162 kmem_cache_destroy(fuse_inode_cachep);
1163 out: 1163 out:
1164 return err; 1164 return err;
1165} 1165}
diff --git a/fs/namespace.c b/fs/namespace.c
index 6d3a1963879b..cfc6d4448aa5 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1048,15 +1048,12 @@ static int show_mountinfo(struct seq_file *m, void *v)
1048 if (err) 1048 if (err)
1049 goto out; 1049 goto out;
1050 seq_putc(m, ' '); 1050 seq_putc(m, ' ');
1051 seq_path_root(m, &mnt_path, &root, " \t\n\\"); 1051
1052 if (root.mnt != p->root.mnt || root.dentry != p->root.dentry) { 1052 /* mountpoints outside of chroot jail will give SEQ_SKIP on this */
1053 /* 1053 err = seq_path_root(m, &mnt_path, &root, " \t\n\\");
1054 * Mountpoint is outside root, discard that one. Ugly, 1054 if (err)
1055 * but less so than trying to do that in iterator in a 1055 goto out;
1056 * race-free way (due to renames). 1056
1057 */
1058 return SEQ_SKIP;
1059 }
1060 seq_puts(m, mnt->mnt_flags & MNT_READONLY ? " ro" : " rw"); 1057 seq_puts(m, mnt->mnt_flags & MNT_READONLY ? " ro" : " rw");
1061 show_mnt_opts(m, mnt); 1058 show_mnt_opts(m, mnt);
1062 1059
@@ -2776,3 +2773,8 @@ void kern_unmount(struct vfsmount *mnt)
2776 } 2773 }
2777} 2774}
2778EXPORT_SYMBOL(kern_unmount); 2775EXPORT_SYMBOL(kern_unmount);
2776
2777bool our_mnt(struct vfsmount *mnt)
2778{
2779 return check_mnt(mnt);
2780}
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 5b5fa33b6b9d..cbd1a61c110a 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -548,7 +548,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
548 548
549 error = bdi_setup_and_register(&server->bdi, "ncpfs", BDI_CAP_MAP_COPY); 549 error = bdi_setup_and_register(&server->bdi, "ncpfs", BDI_CAP_MAP_COPY);
550 if (error) 550 if (error)
551 goto out_bdi; 551 goto out_fput;
552 552
553 server->ncp_filp = ncp_filp; 553 server->ncp_filp = ncp_filp;
554 server->ncp_sock = sock; 554 server->ncp_sock = sock;
@@ -559,7 +559,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
559 error = -EBADF; 559 error = -EBADF;
560 server->info_filp = fget(data.info_fd); 560 server->info_filp = fget(data.info_fd);
561 if (!server->info_filp) 561 if (!server->info_filp)
562 goto out_fput; 562 goto out_bdi;
563 error = -ENOTSOCK; 563 error = -ENOTSOCK;
564 sock_inode = server->info_filp->f_path.dentry->d_inode; 564 sock_inode = server->info_filp->f_path.dentry->d_inode;
565 if (!S_ISSOCK(sock_inode->i_mode)) 565 if (!S_ISSOCK(sock_inode->i_mode))
@@ -746,9 +746,9 @@ out_nls:
746out_fput2: 746out_fput2:
747 if (server->info_filp) 747 if (server->info_filp)
748 fput(server->info_filp); 748 fput(server->info_filp);
749out_fput:
750 bdi_destroy(&server->bdi);
751out_bdi: 749out_bdi:
750 bdi_destroy(&server->bdi);
751out_fput:
752 /* 23/12/1998 Marcin Dalecki <dalecki@cs.net.pl>: 752 /* 23/12/1998 Marcin Dalecki <dalecki@cs.net.pl>:
753 * 753 *
754 * The previously used put_filp(ncp_filp); was bogus, since 754 * The previously used put_filp(ncp_filp); was bogus, since
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index eca56d4b39c0..606ef0f20aed 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -147,7 +147,7 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
147 * origin == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate 147 * origin == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate
148 * the cached file length 148 * the cached file length
149 */ 149 */
150 if (origin != SEEK_SET || origin != SEEK_CUR) { 150 if (origin != SEEK_SET && origin != SEEK_CUR) {
151 struct inode *inode = filp->f_mapping->host; 151 struct inode *inode = filp->f_mapping->host;
152 152
153 int retval = nfs_revalidate_file_size(inode, filp); 153 int retval = nfs_revalidate_file_size(inode, filp);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index be2bbac13817..d9f4d78c3413 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -39,6 +39,8 @@
39#include <linux/delay.h> 39#include <linux/delay.h>
40#include <linux/errno.h> 40#include <linux/errno.h>
41#include <linux/string.h> 41#include <linux/string.h>
42#include <linux/ratelimit.h>
43#include <linux/printk.h>
42#include <linux/slab.h> 44#include <linux/slab.h>
43#include <linux/sunrpc/clnt.h> 45#include <linux/sunrpc/clnt.h>
44#include <linux/sunrpc/gss_api.h> 46#include <linux/sunrpc/gss_api.h>
@@ -894,6 +896,8 @@ out:
894 896
895static int can_open_delegated(struct nfs_delegation *delegation, fmode_t fmode) 897static int can_open_delegated(struct nfs_delegation *delegation, fmode_t fmode)
896{ 898{
899 if (delegation == NULL)
900 return 0;
897 if ((delegation->type & fmode) != fmode) 901 if ((delegation->type & fmode) != fmode)
898 return 0; 902 return 0;
899 if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags)) 903 if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags))
@@ -1036,8 +1040,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
1036 } 1040 }
1037 rcu_read_lock(); 1041 rcu_read_lock();
1038 delegation = rcu_dereference(nfsi->delegation); 1042 delegation = rcu_dereference(nfsi->delegation);
1039 if (delegation == NULL || 1043 if (!can_open_delegated(delegation, fmode)) {
1040 !can_open_delegated(delegation, fmode)) {
1041 rcu_read_unlock(); 1044 rcu_read_unlock();
1042 break; 1045 break;
1043 } 1046 }
@@ -1091,7 +1094,12 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data
1091 if (delegation) 1094 if (delegation)
1092 delegation_flags = delegation->flags; 1095 delegation_flags = delegation->flags;
1093 rcu_read_unlock(); 1096 rcu_read_unlock();
1094 if ((delegation_flags & 1UL<<NFS_DELEGATION_NEED_RECLAIM) == 0) 1097 if (data->o_arg.claim == NFS4_OPEN_CLAIM_DELEGATE_CUR) {
1098 pr_err_ratelimited("NFS: Broken NFSv4 server %s is "
1099 "returning a delegation for "
1100 "OPEN(CLAIM_DELEGATE_CUR)\n",
1101 NFS_CLIENT(inode)->cl_server);
1102 } else if ((delegation_flags & 1UL<<NFS_DELEGATION_NEED_RECLAIM) == 0)
1095 nfs_inode_set_delegation(state->inode, 1103 nfs_inode_set_delegation(state->inode,
1096 data->owner->so_cred, 1104 data->owner->so_cred,
1097 &data->o_res); 1105 &data->o_res);
@@ -1423,11 +1431,9 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
1423 goto out_no_action; 1431 goto out_no_action;
1424 rcu_read_lock(); 1432 rcu_read_lock();
1425 delegation = rcu_dereference(NFS_I(data->state->inode)->delegation); 1433 delegation = rcu_dereference(NFS_I(data->state->inode)->delegation);
1426 if (delegation != NULL && 1434 if (data->o_arg.claim != NFS4_OPEN_CLAIM_DELEGATE_CUR &&
1427 test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) == 0) { 1435 can_open_delegated(delegation, data->o_arg.fmode))
1428 rcu_read_unlock(); 1436 goto unlock_no_action;
1429 goto out_no_action;
1430 }
1431 rcu_read_unlock(); 1437 rcu_read_unlock();
1432 } 1438 }
1433 /* Update sequence id. */ 1439 /* Update sequence id. */
@@ -1444,6 +1450,8 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
1444 return; 1450 return;
1445 rpc_call_start(task); 1451 rpc_call_start(task);
1446 return; 1452 return;
1453unlock_no_action:
1454 rcu_read_unlock();
1447out_no_action: 1455out_no_action:
1448 task->tk_action = NULL; 1456 task->tk_action = NULL;
1449 1457
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 39914be40b03..6a7107ae6b72 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1156,11 +1156,13 @@ restart:
1156 if (status >= 0) { 1156 if (status >= 0) {
1157 status = nfs4_reclaim_locks(state, ops); 1157 status = nfs4_reclaim_locks(state, ops);
1158 if (status >= 0) { 1158 if (status >= 0) {
1159 spin_lock(&state->state_lock);
1159 list_for_each_entry(lock, &state->lock_states, ls_locks) { 1160 list_for_each_entry(lock, &state->lock_states, ls_locks) {
1160 if (!(lock->ls_flags & NFS_LOCK_INITIALIZED)) 1161 if (!(lock->ls_flags & NFS_LOCK_INITIALIZED))
1161 printk("%s: Lock reclaim failed!\n", 1162 printk("%s: Lock reclaim failed!\n",
1162 __func__); 1163 __func__);
1163 } 1164 }
1165 spin_unlock(&state->state_lock);
1164 nfs4_put_open_state(state); 1166 nfs4_put_open_state(state);
1165 goto restart; 1167 goto restart;
1166 } 1168 }
@@ -1224,10 +1226,12 @@ static void nfs4_clear_open_state(struct nfs4_state *state)
1224 clear_bit(NFS_O_RDONLY_STATE, &state->flags); 1226 clear_bit(NFS_O_RDONLY_STATE, &state->flags);
1225 clear_bit(NFS_O_WRONLY_STATE, &state->flags); 1227 clear_bit(NFS_O_WRONLY_STATE, &state->flags);
1226 clear_bit(NFS_O_RDWR_STATE, &state->flags); 1228 clear_bit(NFS_O_RDWR_STATE, &state->flags);
1229 spin_lock(&state->state_lock);
1227 list_for_each_entry(lock, &state->lock_states, ls_locks) { 1230 list_for_each_entry(lock, &state->lock_states, ls_locks) {
1228 lock->ls_seqid.flags = 0; 1231 lock->ls_seqid.flags = 0;
1229 lock->ls_flags &= ~NFS_LOCK_INITIALIZED; 1232 lock->ls_flags &= ~NFS_LOCK_INITIALIZED;
1230 } 1233 }
1234 spin_unlock(&state->state_lock);
1231} 1235}
1232 1236
1233static void nfs4_reset_seqids(struct nfs_server *server, 1237static void nfs4_reset_seqids(struct nfs_server *server,
@@ -1350,12 +1354,14 @@ static void nfs4_warn_keyexpired(const char *s)
1350static int nfs4_recovery_handle_error(struct nfs_client *clp, int error) 1354static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
1351{ 1355{
1352 switch (error) { 1356 switch (error) {
1357 case 0:
1358 break;
1353 case -NFS4ERR_CB_PATH_DOWN: 1359 case -NFS4ERR_CB_PATH_DOWN:
1354 nfs_handle_cb_pathdown(clp); 1360 nfs_handle_cb_pathdown(clp);
1355 return 0; 1361 break;
1356 case -NFS4ERR_NO_GRACE: 1362 case -NFS4ERR_NO_GRACE:
1357 nfs4_state_end_reclaim_reboot(clp); 1363 nfs4_state_end_reclaim_reboot(clp);
1358 return 0; 1364 break;
1359 case -NFS4ERR_STALE_CLIENTID: 1365 case -NFS4ERR_STALE_CLIENTID:
1360 case -NFS4ERR_LEASE_MOVED: 1366 case -NFS4ERR_LEASE_MOVED:
1361 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); 1367 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
@@ -1375,13 +1381,15 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
1375 case -NFS4ERR_SEQ_MISORDERED: 1381 case -NFS4ERR_SEQ_MISORDERED:
1376 set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); 1382 set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
1377 /* Zero session reset errors */ 1383 /* Zero session reset errors */
1378 return 0; 1384 break;
1379 case -EKEYEXPIRED: 1385 case -EKEYEXPIRED:
1380 /* Nothing we can do */ 1386 /* Nothing we can do */
1381 nfs4_warn_keyexpired(clp->cl_hostname); 1387 nfs4_warn_keyexpired(clp->cl_hostname);
1382 return 0; 1388 break;
1389 default:
1390 return error;
1383 } 1391 }
1384 return error; 1392 return 0;
1385} 1393}
1386 1394
1387static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recovery_ops *ops) 1395static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recovery_ops *ops)
@@ -1428,7 +1436,7 @@ static int nfs4_check_lease(struct nfs_client *clp)
1428 struct rpc_cred *cred; 1436 struct rpc_cred *cred;
1429 const struct nfs4_state_maintenance_ops *ops = 1437 const struct nfs4_state_maintenance_ops *ops =
1430 clp->cl_mvops->state_renewal_ops; 1438 clp->cl_mvops->state_renewal_ops;
1431 int status = -NFS4ERR_EXPIRED; 1439 int status;
1432 1440
1433 /* Is the client already known to have an expired lease? */ 1441 /* Is the client already known to have an expired lease? */
1434 if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) 1442 if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
@@ -1438,6 +1446,7 @@ static int nfs4_check_lease(struct nfs_client *clp)
1438 spin_unlock(&clp->cl_lock); 1446 spin_unlock(&clp->cl_lock);
1439 if (cred == NULL) { 1447 if (cred == NULL) {
1440 cred = nfs4_get_setclientid_cred(clp); 1448 cred = nfs4_get_setclientid_cred(clp);
1449 status = -ENOKEY;
1441 if (cred == NULL) 1450 if (cred == NULL)
1442 goto out; 1451 goto out;
1443 } 1452 }
@@ -1525,16 +1534,16 @@ void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
1525{ 1534{
1526 if (!flags) 1535 if (!flags)
1527 return; 1536 return;
1528 else if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED) 1537 if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED)
1529 nfs41_handle_server_reboot(clp); 1538 nfs41_handle_server_reboot(clp);
1530 else if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED | 1539 if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED |
1531 SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED | 1540 SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED |
1532 SEQ4_STATUS_ADMIN_STATE_REVOKED | 1541 SEQ4_STATUS_ADMIN_STATE_REVOKED |
1533 SEQ4_STATUS_LEASE_MOVED)) 1542 SEQ4_STATUS_LEASE_MOVED))
1534 nfs41_handle_state_revoked(clp); 1543 nfs41_handle_state_revoked(clp);
1535 else if (flags & SEQ4_STATUS_RECALLABLE_STATE_REVOKED) 1544 if (flags & SEQ4_STATUS_RECALLABLE_STATE_REVOKED)
1536 nfs41_handle_recallable_state_revoked(clp); 1545 nfs41_handle_recallable_state_revoked(clp);
1537 else if (flags & (SEQ4_STATUS_CB_PATH_DOWN | 1546 if (flags & (SEQ4_STATUS_CB_PATH_DOWN |
1538 SEQ4_STATUS_BACKCHANNEL_FAULT | 1547 SEQ4_STATUS_BACKCHANNEL_FAULT |
1539 SEQ4_STATUS_CB_PATH_DOWN_SESSION)) 1548 SEQ4_STATUS_CB_PATH_DOWN_SESSION))
1540 nfs41_handle_cb_path_down(clp); 1549 nfs41_handle_cb_path_down(clp);
@@ -1662,10 +1671,10 @@ static void nfs4_state_manager(struct nfs_client *clp)
1662 1671
1663 if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) { 1672 if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) {
1664 status = nfs4_check_lease(clp); 1673 status = nfs4_check_lease(clp);
1674 if (status < 0)
1675 goto out_error;
1665 if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) 1676 if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
1666 continue; 1677 continue;
1667 if (status < 0 && status != -NFS4ERR_CB_PATH_DOWN)
1668 goto out_error;
1669 } 1678 }
1670 1679
1671 /* Initialize or reset the session */ 1680 /* Initialize or reset the session */
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 41d6743d303c..ac258beeda3c 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -625,6 +625,9 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
625 if (argv[n].v_nmembs > nsegs * nilfs->ns_blocks_per_segment) 625 if (argv[n].v_nmembs > nsegs * nilfs->ns_blocks_per_segment)
626 goto out_free; 626 goto out_free;
627 627
628 if (argv[n].v_nmembs >= UINT_MAX / argv[n].v_size)
629 goto out_free;
630
628 len = argv[n].v_size * argv[n].v_nmembs; 631 len = argv[n].v_size * argv[n].v_nmembs;
629 base = (void __user *)(unsigned long)argv[n].v_base; 632 base = (void __user *)(unsigned long)argv[n].v_base;
630 if (len == 0) { 633 if (len == 0) {
@@ -842,6 +845,19 @@ long nilfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
842 case FS_IOC32_GETVERSION: 845 case FS_IOC32_GETVERSION:
843 cmd = FS_IOC_GETVERSION; 846 cmd = FS_IOC_GETVERSION;
844 break; 847 break;
848 case NILFS_IOCTL_CHANGE_CPMODE:
849 case NILFS_IOCTL_DELETE_CHECKPOINT:
850 case NILFS_IOCTL_GET_CPINFO:
851 case NILFS_IOCTL_GET_CPSTAT:
852 case NILFS_IOCTL_GET_SUINFO:
853 case NILFS_IOCTL_GET_SUSTAT:
854 case NILFS_IOCTL_GET_VINFO:
855 case NILFS_IOCTL_GET_BDESCS:
856 case NILFS_IOCTL_CLEAN_SEGMENTS:
857 case NILFS_IOCTL_SYNC:
858 case NILFS_IOCTL_RESIZE:
859 case NILFS_IOCTL_SET_ALLOC_RANGE:
860 break;
845 default: 861 default:
846 return -ENOIOCTLCMD; 862 return -ENOIOCTLCMD;
847 } 863 }
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 586174168e2a..80e4645f7990 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -131,12 +131,13 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
131 K(i.freeswap), 131 K(i.freeswap),
132 K(global_page_state(NR_FILE_DIRTY)), 132 K(global_page_state(NR_FILE_DIRTY)),
133 K(global_page_state(NR_WRITEBACK)), 133 K(global_page_state(NR_WRITEBACK)),
134 K(global_page_state(NR_ANON_PAGES)
135#ifdef CONFIG_TRANSPARENT_HUGEPAGE 134#ifdef CONFIG_TRANSPARENT_HUGEPAGE
135 K(global_page_state(NR_ANON_PAGES)
136 + global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) * 136 + global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) *
137 HPAGE_PMD_NR 137 HPAGE_PMD_NR),
138#else
139 K(global_page_state(NR_ANON_PAGES)),
138#endif 140#endif
139 ),
140 K(global_page_state(NR_FILE_MAPPED)), 141 K(global_page_state(NR_FILE_MAPPED)),
141 K(global_page_state(NR_SHMEM)), 142 K(global_page_state(NR_SHMEM)),
142 K(global_page_state(NR_SLAB_RECLAIMABLE) + 143 K(global_page_state(NR_SLAB_RECLAIMABLE) +
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 9a8a2b77b874..03102d978180 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -91,20 +91,18 @@ static struct file_system_type proc_fs_type = {
91 91
92void __init proc_root_init(void) 92void __init proc_root_init(void)
93{ 93{
94 struct vfsmount *mnt;
95 int err; 94 int err;
96 95
97 proc_init_inodecache(); 96 proc_init_inodecache();
98 err = register_filesystem(&proc_fs_type); 97 err = register_filesystem(&proc_fs_type);
99 if (err) 98 if (err)
100 return; 99 return;
101 mnt = kern_mount_data(&proc_fs_type, &init_pid_ns); 100 err = pid_ns_prepare_proc(&init_pid_ns);
102 if (IS_ERR(mnt)) { 101 if (err) {
103 unregister_filesystem(&proc_fs_type); 102 unregister_filesystem(&proc_fs_type);
104 return; 103 return;
105 } 104 }
106 105
107 init_pid_ns.proc_mnt = mnt;
108 proc_symlink("mounts", NULL, "self/mounts"); 106 proc_symlink("mounts", NULL, "self/mounts");
109 107
110 proc_net_init(); 108 proc_net_init();
@@ -209,5 +207,5 @@ int pid_ns_prepare_proc(struct pid_namespace *ns)
209 207
210void pid_ns_release_proc(struct pid_namespace *ns) 208void pid_ns_release_proc(struct pid_namespace *ns)
211{ 209{
212 mntput(ns->proc_mnt); 210 kern_unmount(ns->proc_mnt);
213} 211}
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 42b274da92c3..2a30d67dd6b8 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -32,7 +32,7 @@ static cputime64_t get_idle_time(int cpu)
32 idle = kstat_cpu(cpu).cpustat.idle; 32 idle = kstat_cpu(cpu).cpustat.idle;
33 idle = cputime64_add(idle, arch_idle_time(cpu)); 33 idle = cputime64_add(idle, arch_idle_time(cpu));
34 } else 34 } else
35 idle = usecs_to_cputime(idle_time); 35 idle = nsecs_to_jiffies64(1000 * idle_time);
36 36
37 return idle; 37 return idle;
38} 38}
@@ -46,7 +46,7 @@ static cputime64_t get_iowait_time(int cpu)
46 /* !NO_HZ so we can rely on cpustat.iowait */ 46 /* !NO_HZ so we can rely on cpustat.iowait */
47 iowait = kstat_cpu(cpu).cpustat.iowait; 47 iowait = kstat_cpu(cpu).cpustat.iowait;
48 else 48 else
49 iowait = usecs_to_cputime(iowait_time); 49 iowait = nsecs_to_jiffies64(1000 * iowait_time);
50 50
51 return iowait; 51 return iowait;
52} 52}
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 05d6b0e78c95..dba43c3ea3af 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -449,8 +449,6 @@ EXPORT_SYMBOL(seq_path);
449 449
450/* 450/*
451 * Same as seq_path, but relative to supplied root. 451 * Same as seq_path, but relative to supplied root.
452 *
453 * root may be changed, see __d_path().
454 */ 452 */
455int seq_path_root(struct seq_file *m, struct path *path, struct path *root, 453int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
456 char *esc) 454 char *esc)
@@ -463,6 +461,8 @@ int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
463 char *p; 461 char *p;
464 462
465 p = __d_path(path, root, buf, size); 463 p = __d_path(path, root, buf, size);
464 if (!p)
465 return SEQ_SKIP;
466 res = PTR_ERR(p); 466 res = PTR_ERR(p);
467 if (!IS_ERR(p)) { 467 if (!IS_ERR(p)) {
468 char *end = mangle_path(buf, p, esc); 468 char *end = mangle_path(buf, p, esc);
@@ -474,7 +474,7 @@ int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
474 } 474 }
475 seq_commit(m, res); 475 seq_commit(m, res);
476 476
477 return res < 0 ? res : 0; 477 return res < 0 && res != -ENAMETOOLONG ? res : 0;
478} 478}
479 479
480/* 480/*
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 20403dc5d437..ae0e76bb6ebf 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -2264,19 +2264,12 @@ static int __init ubifs_init(void)
2264 return -EINVAL; 2264 return -EINVAL;
2265 } 2265 }
2266 2266
2267 err = register_filesystem(&ubifs_fs_type);
2268 if (err) {
2269 ubifs_err("cannot register file system, error %d", err);
2270 return err;
2271 }
2272
2273 err = -ENOMEM;
2274 ubifs_inode_slab = kmem_cache_create("ubifs_inode_slab", 2267 ubifs_inode_slab = kmem_cache_create("ubifs_inode_slab",
2275 sizeof(struct ubifs_inode), 0, 2268 sizeof(struct ubifs_inode), 0,
2276 SLAB_MEM_SPREAD | SLAB_RECLAIM_ACCOUNT, 2269 SLAB_MEM_SPREAD | SLAB_RECLAIM_ACCOUNT,
2277 &inode_slab_ctor); 2270 &inode_slab_ctor);
2278 if (!ubifs_inode_slab) 2271 if (!ubifs_inode_slab)
2279 goto out_reg; 2272 return -ENOMEM;
2280 2273
2281 register_shrinker(&ubifs_shrinker_info); 2274 register_shrinker(&ubifs_shrinker_info);
2282 2275
@@ -2288,15 +2281,20 @@ static int __init ubifs_init(void)
2288 if (err) 2281 if (err)
2289 goto out_compr; 2282 goto out_compr;
2290 2283
2284 err = register_filesystem(&ubifs_fs_type);
2285 if (err) {
2286 ubifs_err("cannot register file system, error %d", err);
2287 goto out_dbg;
2288 }
2291 return 0; 2289 return 0;
2292 2290
2291out_dbg:
2292 dbg_debugfs_exit();
2293out_compr: 2293out_compr:
2294 ubifs_compressors_exit(); 2294 ubifs_compressors_exit();
2295out_shrinker: 2295out_shrinker:
2296 unregister_shrinker(&ubifs_shrinker_info); 2296 unregister_shrinker(&ubifs_shrinker_info);
2297 kmem_cache_destroy(ubifs_inode_slab); 2297 kmem_cache_destroy(ubifs_inode_slab);
2298out_reg:
2299 unregister_filesystem(&ubifs_fs_type);
2300 return err; 2298 return err;
2301} 2299}
2302/* late_initcall to let compressors initialize first */ 2300/* late_initcall to let compressors initialize first */
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index b6c4b3795c4a..76e4266d2e7e 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -42,6 +42,8 @@ xfs_acl_from_disk(struct xfs_acl *aclp)
42 int count, i; 42 int count, i;
43 43
44 count = be32_to_cpu(aclp->acl_cnt); 44 count = be32_to_cpu(aclp->acl_cnt);
45 if (count > XFS_ACL_MAX_ENTRIES)
46 return ERR_PTR(-EFSCORRUPTED);
45 47
46 acl = posix_acl_alloc(count, GFP_KERNEL); 48 acl = posix_acl_alloc(count, GFP_KERNEL);
47 if (!acl) 49 if (!acl)
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index d4906e7c9787..c1b55e596551 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -110,6 +110,7 @@ xfs_attr_namesp_match(int arg_flags, int ondisk_flags)
110/* 110/*
111 * Query whether the requested number of additional bytes of extended 111 * Query whether the requested number of additional bytes of extended
112 * attribute space will be able to fit inline. 112 * attribute space will be able to fit inline.
113 *
113 * Returns zero if not, else the di_forkoff fork offset to be used in the 114 * Returns zero if not, else the di_forkoff fork offset to be used in the
114 * literal area for attribute data once the new bytes have been added. 115 * literal area for attribute data once the new bytes have been added.
115 * 116 *
@@ -122,7 +123,7 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
122 int offset; 123 int offset;
123 int minforkoff; /* lower limit on valid forkoff locations */ 124 int minforkoff; /* lower limit on valid forkoff locations */
124 int maxforkoff; /* upper limit on valid forkoff locations */ 125 int maxforkoff; /* upper limit on valid forkoff locations */
125 int dsize; 126 int dsize;
126 xfs_mount_t *mp = dp->i_mount; 127 xfs_mount_t *mp = dp->i_mount;
127 128
128 offset = (XFS_LITINO(mp) - bytes) >> 3; /* rounded down */ 129 offset = (XFS_LITINO(mp) - bytes) >> 3; /* rounded down */
@@ -136,47 +137,60 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
136 return (offset >= minforkoff) ? minforkoff : 0; 137 return (offset >= minforkoff) ? minforkoff : 0;
137 } 138 }
138 139
139 if (!(mp->m_flags & XFS_MOUNT_ATTR2)) { 140 /*
140 if (bytes <= XFS_IFORK_ASIZE(dp)) 141 * If the requested numbers of bytes is smaller or equal to the
141 return dp->i_d.di_forkoff; 142 * current attribute fork size we can always proceed.
143 *
144 * Note that if_bytes in the data fork might actually be larger than
145 * the current data fork size is due to delalloc extents. In that
146 * case either the extent count will go down when they are converted
147 * to real extents, or the delalloc conversion will take care of the
148 * literal area rebalancing.
149 */
150 if (bytes <= XFS_IFORK_ASIZE(dp))
151 return dp->i_d.di_forkoff;
152
153 /*
154 * For attr2 we can try to move the forkoff if there is space in the
155 * literal area, but for the old format we are done if there is no
156 * space in the fixed attribute fork.
157 */
158 if (!(mp->m_flags & XFS_MOUNT_ATTR2))
142 return 0; 159 return 0;
143 }
144 160
145 dsize = dp->i_df.if_bytes; 161 dsize = dp->i_df.if_bytes;
146 162
147 switch (dp->i_d.di_format) { 163 switch (dp->i_d.di_format) {
148 case XFS_DINODE_FMT_EXTENTS: 164 case XFS_DINODE_FMT_EXTENTS:
149 /* 165 /*
150 * If there is no attr fork and the data fork is extents, 166 * If there is no attr fork and the data fork is extents,
151 * determine if creating the default attr fork will result 167 * determine if creating the default attr fork will result
152 * in the extents form migrating to btree. If so, the 168 * in the extents form migrating to btree. If so, the
153 * minimum offset only needs to be the space required for 169 * minimum offset only needs to be the space required for
154 * the btree root. 170 * the btree root.
155 */ 171 */
156 if (!dp->i_d.di_forkoff && dp->i_df.if_bytes > 172 if (!dp->i_d.di_forkoff && dp->i_df.if_bytes >
157 xfs_default_attroffset(dp)) 173 xfs_default_attroffset(dp))
158 dsize = XFS_BMDR_SPACE_CALC(MINDBTPTRS); 174 dsize = XFS_BMDR_SPACE_CALC(MINDBTPTRS);
159 break; 175 break;
160
161 case XFS_DINODE_FMT_BTREE: 176 case XFS_DINODE_FMT_BTREE:
162 /* 177 /*
163 * If have data btree then keep forkoff if we have one, 178 * If we have a data btree then keep forkoff if we have one,
164 * otherwise we are adding a new attr, so then we set 179 * otherwise we are adding a new attr, so then we set
165 * minforkoff to where the btree root can finish so we have 180 * minforkoff to where the btree root can finish so we have
166 * plenty of room for attrs 181 * plenty of room for attrs
167 */ 182 */
168 if (dp->i_d.di_forkoff) { 183 if (dp->i_d.di_forkoff) {
169 if (offset < dp->i_d.di_forkoff) 184 if (offset < dp->i_d.di_forkoff)
170 return 0; 185 return 0;
171 else 186 return dp->i_d.di_forkoff;
172 return dp->i_d.di_forkoff; 187 }
173 } else 188 dsize = XFS_BMAP_BROOT_SPACE(dp->i_df.if_broot);
174 dsize = XFS_BMAP_BROOT_SPACE(dp->i_df.if_broot);
175 break; 189 break;
176 } 190 }
177 191
178 /* 192 /*
179 * A data fork btree root must have space for at least 193 * A data fork btree root must have space for at least
180 * MINDBTPTRS key/ptr pairs if the data fork is small or empty. 194 * MINDBTPTRS key/ptr pairs if the data fork is small or empty.
181 */ 195 */
182 minforkoff = MAX(dsize, XFS_BMDR_SPACE_CALC(MINDBTPTRS)); 196 minforkoff = MAX(dsize, XFS_BMDR_SPACE_CALC(MINDBTPTRS));
@@ -186,10 +200,10 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
186 maxforkoff = XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(MINABTPTRS); 200 maxforkoff = XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(MINABTPTRS);
187 maxforkoff = maxforkoff >> 3; /* rounded down */ 201 maxforkoff = maxforkoff >> 3; /* rounded down */
188 202
189 if (offset >= minforkoff && offset < maxforkoff)
190 return offset;
191 if (offset >= maxforkoff) 203 if (offset >= maxforkoff)
192 return maxforkoff; 204 return maxforkoff;
205 if (offset >= minforkoff)
206 return offset;
193 return 0; 207 return 0;
194} 208}
195 209
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index c68baeb0974a..d0ab78837057 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -2383,6 +2383,8 @@ xfs_bmap_btalloc(
2383 int tryagain; 2383 int tryagain;
2384 int error; 2384 int error;
2385 2385
2386 ASSERT(ap->length);
2387
2386 mp = ap->ip->i_mount; 2388 mp = ap->ip->i_mount;
2387 align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0; 2389 align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0;
2388 if (unlikely(align)) { 2390 if (unlikely(align)) {
@@ -4629,6 +4631,8 @@ xfs_bmapi_allocate(
4629 int error; 4631 int error;
4630 int rt; 4632 int rt;
4631 4633
4634 ASSERT(bma->length > 0);
4635
4632 rt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(bma->ip); 4636 rt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(bma->ip);
4633 4637
4634 /* 4638 /*
@@ -4849,6 +4853,7 @@ xfs_bmapi_write(
4849 ASSERT(*nmap <= XFS_BMAP_MAX_NMAP); 4853 ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
4850 ASSERT(!(flags & XFS_BMAPI_IGSTATE)); 4854 ASSERT(!(flags & XFS_BMAPI_IGSTATE));
4851 ASSERT(tp != NULL); 4855 ASSERT(tp != NULL);
4856 ASSERT(len > 0);
4852 4857
4853 whichfork = (flags & XFS_BMAPI_ATTRFORK) ? 4858 whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
4854 XFS_ATTR_FORK : XFS_DATA_FORK; 4859 XFS_ATTR_FORK : XFS_DATA_FORK;
@@ -4918,9 +4923,22 @@ xfs_bmapi_write(
4918 bma.eof = eof; 4923 bma.eof = eof;
4919 bma.conv = !!(flags & XFS_BMAPI_CONVERT); 4924 bma.conv = !!(flags & XFS_BMAPI_CONVERT);
4920 bma.wasdel = wasdelay; 4925 bma.wasdel = wasdelay;
4921 bma.length = len;
4922 bma.offset = bno; 4926 bma.offset = bno;
4923 4927
4928 /*
4929 * There's a 32/64 bit type mismatch between the
4930 * allocation length request (which can be 64 bits in
4931 * length) and the bma length request, which is
4932 * xfs_extlen_t and therefore 32 bits. Hence we have to
4933 * check for 32-bit overflows and handle them here.
4934 */
4935 if (len > (xfs_filblks_t)MAXEXTLEN)
4936 bma.length = MAXEXTLEN;
4937 else
4938 bma.length = len;
4939
4940 ASSERT(len > 0);
4941 ASSERT(bma.length > 0);
4924 error = xfs_bmapi_allocate(&bma, flags); 4942 error = xfs_bmapi_allocate(&bma, flags);
4925 if (error) 4943 if (error)
4926 goto error0; 4944 goto error0;
diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c
index da108977b21f..558910f5e3c0 100644
--- a/fs/xfs/xfs_export.c
+++ b/fs/xfs/xfs_export.c
@@ -98,22 +98,22 @@ xfs_fs_encode_fh(
98 switch (fileid_type) { 98 switch (fileid_type) {
99 case FILEID_INO32_GEN_PARENT: 99 case FILEID_INO32_GEN_PARENT:
100 spin_lock(&dentry->d_lock); 100 spin_lock(&dentry->d_lock);
101 fid->i32.parent_ino = dentry->d_parent->d_inode->i_ino; 101 fid->i32.parent_ino = XFS_I(dentry->d_parent->d_inode)->i_ino;
102 fid->i32.parent_gen = dentry->d_parent->d_inode->i_generation; 102 fid->i32.parent_gen = dentry->d_parent->d_inode->i_generation;
103 spin_unlock(&dentry->d_lock); 103 spin_unlock(&dentry->d_lock);
104 /*FALLTHRU*/ 104 /*FALLTHRU*/
105 case FILEID_INO32_GEN: 105 case FILEID_INO32_GEN:
106 fid->i32.ino = inode->i_ino; 106 fid->i32.ino = XFS_I(inode)->i_ino;
107 fid->i32.gen = inode->i_generation; 107 fid->i32.gen = inode->i_generation;
108 break; 108 break;
109 case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG: 109 case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG:
110 spin_lock(&dentry->d_lock); 110 spin_lock(&dentry->d_lock);
111 fid64->parent_ino = dentry->d_parent->d_inode->i_ino; 111 fid64->parent_ino = XFS_I(dentry->d_parent->d_inode)->i_ino;
112 fid64->parent_gen = dentry->d_parent->d_inode->i_generation; 112 fid64->parent_gen = dentry->d_parent->d_inode->i_generation;
113 spin_unlock(&dentry->d_lock); 113 spin_unlock(&dentry->d_lock);
114 /*FALLTHRU*/ 114 /*FALLTHRU*/
115 case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG: 115 case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG:
116 fid64->ino = inode->i_ino; 116 fid64->ino = XFS_I(inode)->i_ino;
117 fid64->gen = inode->i_generation; 117 fid64->gen = inode->i_generation;
118 break; 118 break;
119 } 119 }
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index c0237c602f11..755ee8164880 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2835,6 +2835,27 @@ corrupt_out:
2835 return XFS_ERROR(EFSCORRUPTED); 2835 return XFS_ERROR(EFSCORRUPTED);
2836} 2836}
2837 2837
2838void
2839xfs_promote_inode(
2840 struct xfs_inode *ip)
2841{
2842 struct xfs_buf *bp;
2843
2844 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2845
2846 bp = xfs_incore(ip->i_mount->m_ddev_targp, ip->i_imap.im_blkno,
2847 ip->i_imap.im_len, XBF_TRYLOCK);
2848 if (!bp)
2849 return;
2850
2851 if (XFS_BUF_ISDELAYWRITE(bp)) {
2852 xfs_buf_delwri_promote(bp);
2853 wake_up_process(ip->i_mount->m_ddev_targp->bt_task);
2854 }
2855
2856 xfs_buf_relse(bp);
2857}
2858
2838/* 2859/*
2839 * Return a pointer to the extent record at file index idx. 2860 * Return a pointer to the extent record at file index idx.
2840 */ 2861 */
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 760140d1dd66..b4cd4739f98e 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -498,6 +498,7 @@ int xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
498void xfs_iext_realloc(xfs_inode_t *, int, int); 498void xfs_iext_realloc(xfs_inode_t *, int, int);
499void xfs_iunpin_wait(xfs_inode_t *); 499void xfs_iunpin_wait(xfs_inode_t *);
500int xfs_iflush(xfs_inode_t *, uint); 500int xfs_iflush(xfs_inode_t *, uint);
501void xfs_promote_inode(struct xfs_inode *);
501void xfs_lock_inodes(xfs_inode_t **, int, uint); 502void xfs_lock_inodes(xfs_inode_t **, int, uint);
502void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); 503void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
503 504
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index a14cd89fe465..34817adf4b9e 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -150,6 +150,117 @@ xlog_grant_add_space(
150 } while (head_val != old); 150 } while (head_val != old);
151} 151}
152 152
153STATIC bool
154xlog_reserveq_wake(
155 struct log *log,
156 int *free_bytes)
157{
158 struct xlog_ticket *tic;
159 int need_bytes;
160
161 list_for_each_entry(tic, &log->l_reserveq, t_queue) {
162 if (tic->t_flags & XLOG_TIC_PERM_RESERV)
163 need_bytes = tic->t_unit_res * tic->t_cnt;
164 else
165 need_bytes = tic->t_unit_res;
166
167 if (*free_bytes < need_bytes)
168 return false;
169 *free_bytes -= need_bytes;
170
171 trace_xfs_log_grant_wake_up(log, tic);
172 wake_up(&tic->t_wait);
173 }
174
175 return true;
176}
177
178STATIC bool
179xlog_writeq_wake(
180 struct log *log,
181 int *free_bytes)
182{
183 struct xlog_ticket *tic;
184 int need_bytes;
185
186 list_for_each_entry(tic, &log->l_writeq, t_queue) {
187 ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV);
188
189 need_bytes = tic->t_unit_res;
190
191 if (*free_bytes < need_bytes)
192 return false;
193 *free_bytes -= need_bytes;
194
195 trace_xfs_log_regrant_write_wake_up(log, tic);
196 wake_up(&tic->t_wait);
197 }
198
199 return true;
200}
201
202STATIC int
203xlog_reserveq_wait(
204 struct log *log,
205 struct xlog_ticket *tic,
206 int need_bytes)
207{
208 list_add_tail(&tic->t_queue, &log->l_reserveq);
209
210 do {
211 if (XLOG_FORCED_SHUTDOWN(log))
212 goto shutdown;
213 xlog_grant_push_ail(log, need_bytes);
214
215 XFS_STATS_INC(xs_sleep_logspace);
216 trace_xfs_log_grant_sleep(log, tic);
217
218 xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock);
219 trace_xfs_log_grant_wake(log, tic);
220
221 spin_lock(&log->l_grant_reserve_lock);
222 if (XLOG_FORCED_SHUTDOWN(log))
223 goto shutdown;
224 } while (xlog_space_left(log, &log->l_grant_reserve_head) < need_bytes);
225
226 list_del_init(&tic->t_queue);
227 return 0;
228shutdown:
229 list_del_init(&tic->t_queue);
230 return XFS_ERROR(EIO);
231}
232
233STATIC int
234xlog_writeq_wait(
235 struct log *log,
236 struct xlog_ticket *tic,
237 int need_bytes)
238{
239 list_add_tail(&tic->t_queue, &log->l_writeq);
240
241 do {
242 if (XLOG_FORCED_SHUTDOWN(log))
243 goto shutdown;
244 xlog_grant_push_ail(log, need_bytes);
245
246 XFS_STATS_INC(xs_sleep_logspace);
247 trace_xfs_log_regrant_write_sleep(log, tic);
248
249 xlog_wait(&tic->t_wait, &log->l_grant_write_lock);
250 trace_xfs_log_regrant_write_wake(log, tic);
251
252 spin_lock(&log->l_grant_write_lock);
253 if (XLOG_FORCED_SHUTDOWN(log))
254 goto shutdown;
255 } while (xlog_space_left(log, &log->l_grant_write_head) < need_bytes);
256
257 list_del_init(&tic->t_queue);
258 return 0;
259shutdown:
260 list_del_init(&tic->t_queue);
261 return XFS_ERROR(EIO);
262}
263
153static void 264static void
154xlog_tic_reset_res(xlog_ticket_t *tic) 265xlog_tic_reset_res(xlog_ticket_t *tic)
155{ 266{
@@ -350,8 +461,19 @@ xfs_log_reserve(
350 retval = xlog_grant_log_space(log, internal_ticket); 461 retval = xlog_grant_log_space(log, internal_ticket);
351 } 462 }
352 463
464 if (unlikely(retval)) {
465 /*
466 * If we are failing, make sure the ticket doesn't have any
467 * current reservations. We don't want to add this back
468 * when the ticket/ transaction gets cancelled.
469 */
470 internal_ticket->t_curr_res = 0;
471 /* ungrant will give back unit_res * t_cnt. */
472 internal_ticket->t_cnt = 0;
473 }
474
353 return retval; 475 return retval;
354} /* xfs_log_reserve */ 476}
355 477
356 478
357/* 479/*
@@ -2481,8 +2603,8 @@ restart:
2481/* 2603/*
2482 * Atomically get the log space required for a log ticket. 2604 * Atomically get the log space required for a log ticket.
2483 * 2605 *
2484 * Once a ticket gets put onto the reserveq, it will only return after 2606 * Once a ticket gets put onto the reserveq, it will only return after the
2485 * the needed reservation is satisfied. 2607 * needed reservation is satisfied.
2486 * 2608 *
2487 * This function is structured so that it has a lock free fast path. This is 2609 * This function is structured so that it has a lock free fast path. This is
2488 * necessary because every new transaction reservation will come through this 2610 * necessary because every new transaction reservation will come through this
@@ -2490,113 +2612,53 @@ restart:
2490 * every pass. 2612 * every pass.
2491 * 2613 *
2492 * As tickets are only ever moved on and off the reserveq under the 2614 * As tickets are only ever moved on and off the reserveq under the
2493 * l_grant_reserve_lock, we only need to take that lock if we are going 2615 * l_grant_reserve_lock, we only need to take that lock if we are going to add
2494 * to add the ticket to the queue and sleep. We can avoid taking the lock if the 2616 * the ticket to the queue and sleep. We can avoid taking the lock if the ticket
2495 * ticket was never added to the reserveq because the t_queue list head will be 2617 * was never added to the reserveq because the t_queue list head will be empty
2496 * empty and we hold the only reference to it so it can safely be checked 2618 * and we hold the only reference to it so it can safely be checked unlocked.
2497 * unlocked.
2498 */ 2619 */
2499STATIC int 2620STATIC int
2500xlog_grant_log_space(xlog_t *log, 2621xlog_grant_log_space(
2501 xlog_ticket_t *tic) 2622 struct log *log,
2623 struct xlog_ticket *tic)
2502{ 2624{
2503 int free_bytes; 2625 int free_bytes, need_bytes;
2504 int need_bytes; 2626 int error = 0;
2505 2627
2506#ifdef DEBUG 2628 ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY));
2507 if (log->l_flags & XLOG_ACTIVE_RECOVERY)
2508 panic("grant Recovery problem");
2509#endif
2510 2629
2511 trace_xfs_log_grant_enter(log, tic); 2630 trace_xfs_log_grant_enter(log, tic);
2512 2631
2632 /*
2633 * If there are other waiters on the queue then give them a chance at
2634 * logspace before us. Wake up the first waiters, if we do not wake
2635 * up all the waiters then go to sleep waiting for more free space,
2636 * otherwise try to get some space for this transaction.
2637 */
2513 need_bytes = tic->t_unit_res; 2638 need_bytes = tic->t_unit_res;
2514 if (tic->t_flags & XFS_LOG_PERM_RESERV) 2639 if (tic->t_flags & XFS_LOG_PERM_RESERV)
2515 need_bytes *= tic->t_ocnt; 2640 need_bytes *= tic->t_ocnt;
2516
2517 /* something is already sleeping; insert new transaction at end */
2518 if (!list_empty_careful(&log->l_reserveq)) {
2519 spin_lock(&log->l_grant_reserve_lock);
2520 /* recheck the queue now we are locked */
2521 if (list_empty(&log->l_reserveq)) {
2522 spin_unlock(&log->l_grant_reserve_lock);
2523 goto redo;
2524 }
2525 list_add_tail(&tic->t_queue, &log->l_reserveq);
2526
2527 trace_xfs_log_grant_sleep1(log, tic);
2528
2529 /*
2530 * Gotta check this before going to sleep, while we're
2531 * holding the grant lock.
2532 */
2533 if (XLOG_FORCED_SHUTDOWN(log))
2534 goto error_return;
2535
2536 XFS_STATS_INC(xs_sleep_logspace);
2537 xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock);
2538
2539 /*
2540 * If we got an error, and the filesystem is shutting down,
2541 * we'll catch it down below. So just continue...
2542 */
2543 trace_xfs_log_grant_wake1(log, tic);
2544 }
2545
2546redo:
2547 if (XLOG_FORCED_SHUTDOWN(log))
2548 goto error_return_unlocked;
2549
2550 free_bytes = xlog_space_left(log, &log->l_grant_reserve_head); 2641 free_bytes = xlog_space_left(log, &log->l_grant_reserve_head);
2551 if (free_bytes < need_bytes) { 2642 if (!list_empty_careful(&log->l_reserveq)) {
2552 spin_lock(&log->l_grant_reserve_lock); 2643 spin_lock(&log->l_grant_reserve_lock);
2553 if (list_empty(&tic->t_queue)) 2644 if (!xlog_reserveq_wake(log, &free_bytes) ||
2554 list_add_tail(&tic->t_queue, &log->l_reserveq); 2645 free_bytes < need_bytes)
2555 2646 error = xlog_reserveq_wait(log, tic, need_bytes);
2556 trace_xfs_log_grant_sleep2(log, tic); 2647 spin_unlock(&log->l_grant_reserve_lock);
2557 2648 } else if (free_bytes < need_bytes) {
2558 if (XLOG_FORCED_SHUTDOWN(log))
2559 goto error_return;
2560
2561 xlog_grant_push_ail(log, need_bytes);
2562
2563 XFS_STATS_INC(xs_sleep_logspace);
2564 xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock);
2565
2566 trace_xfs_log_grant_wake2(log, tic);
2567 goto redo;
2568 }
2569
2570 if (!list_empty(&tic->t_queue)) {
2571 spin_lock(&log->l_grant_reserve_lock); 2649 spin_lock(&log->l_grant_reserve_lock);
2572 list_del_init(&tic->t_queue); 2650 error = xlog_reserveq_wait(log, tic, need_bytes);
2573 spin_unlock(&log->l_grant_reserve_lock); 2651 spin_unlock(&log->l_grant_reserve_lock);
2574 } 2652 }
2653 if (error)
2654 return error;
2575 2655
2576 /* we've got enough space */
2577 xlog_grant_add_space(log, &log->l_grant_reserve_head, need_bytes); 2656 xlog_grant_add_space(log, &log->l_grant_reserve_head, need_bytes);
2578 xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes); 2657 xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes);
2579 trace_xfs_log_grant_exit(log, tic); 2658 trace_xfs_log_grant_exit(log, tic);
2580 xlog_verify_grant_tail(log); 2659 xlog_verify_grant_tail(log);
2581 return 0; 2660 return 0;
2582 2661}
2583error_return_unlocked:
2584 spin_lock(&log->l_grant_reserve_lock);
2585error_return:
2586 list_del_init(&tic->t_queue);
2587 spin_unlock(&log->l_grant_reserve_lock);
2588 trace_xfs_log_grant_error(log, tic);
2589
2590 /*
2591 * If we are failing, make sure the ticket doesn't have any
2592 * current reservations. We don't want to add this back when
2593 * the ticket/transaction gets cancelled.
2594 */
2595 tic->t_curr_res = 0;
2596 tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */
2597 return XFS_ERROR(EIO);
2598} /* xlog_grant_log_space */
2599
2600 2662
2601/* 2663/*
2602 * Replenish the byte reservation required by moving the grant write head. 2664 * Replenish the byte reservation required by moving the grant write head.
@@ -2605,10 +2667,12 @@ error_return:
2605 * free fast path. 2667 * free fast path.
2606 */ 2668 */
2607STATIC int 2669STATIC int
2608xlog_regrant_write_log_space(xlog_t *log, 2670xlog_regrant_write_log_space(
2609 xlog_ticket_t *tic) 2671 struct log *log,
2672 struct xlog_ticket *tic)
2610{ 2673{
2611 int free_bytes, need_bytes; 2674 int free_bytes, need_bytes;
2675 int error = 0;
2612 2676
2613 tic->t_curr_res = tic->t_unit_res; 2677 tic->t_curr_res = tic->t_unit_res;
2614 xlog_tic_reset_res(tic); 2678 xlog_tic_reset_res(tic);
@@ -2616,104 +2680,38 @@ xlog_regrant_write_log_space(xlog_t *log,
2616 if (tic->t_cnt > 0) 2680 if (tic->t_cnt > 0)
2617 return 0; 2681 return 0;
2618 2682
2619#ifdef DEBUG 2683 ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY));
2620 if (log->l_flags & XLOG_ACTIVE_RECOVERY)
2621 panic("regrant Recovery problem");
2622#endif
2623 2684
2624 trace_xfs_log_regrant_write_enter(log, tic); 2685 trace_xfs_log_regrant_write_enter(log, tic);
2625 if (XLOG_FORCED_SHUTDOWN(log))
2626 goto error_return_unlocked;
2627 2686
2628 /* If there are other waiters on the queue then give them a 2687 /*
2629 * chance at logspace before us. Wake up the first waiters, 2688 * If there are other waiters on the queue then give them a chance at
2630 * if we do not wake up all the waiters then go to sleep waiting 2689 * logspace before us. Wake up the first waiters, if we do not wake
2631 * for more free space, otherwise try to get some space for 2690 * up all the waiters then go to sleep waiting for more free space,
2632 * this transaction. 2691 * otherwise try to get some space for this transaction.
2633 */ 2692 */
2634 need_bytes = tic->t_unit_res; 2693 need_bytes = tic->t_unit_res;
2635 if (!list_empty_careful(&log->l_writeq)) {
2636 struct xlog_ticket *ntic;
2637
2638 spin_lock(&log->l_grant_write_lock);
2639 free_bytes = xlog_space_left(log, &log->l_grant_write_head);
2640 list_for_each_entry(ntic, &log->l_writeq, t_queue) {
2641 ASSERT(ntic->t_flags & XLOG_TIC_PERM_RESERV);
2642
2643 if (free_bytes < ntic->t_unit_res)
2644 break;
2645 free_bytes -= ntic->t_unit_res;
2646 wake_up(&ntic->t_wait);
2647 }
2648
2649 if (ntic != list_first_entry(&log->l_writeq,
2650 struct xlog_ticket, t_queue)) {
2651 if (list_empty(&tic->t_queue))
2652 list_add_tail(&tic->t_queue, &log->l_writeq);
2653 trace_xfs_log_regrant_write_sleep1(log, tic);
2654
2655 xlog_grant_push_ail(log, need_bytes);
2656
2657 XFS_STATS_INC(xs_sleep_logspace);
2658 xlog_wait(&tic->t_wait, &log->l_grant_write_lock);
2659 trace_xfs_log_regrant_write_wake1(log, tic);
2660 } else
2661 spin_unlock(&log->l_grant_write_lock);
2662 }
2663
2664redo:
2665 if (XLOG_FORCED_SHUTDOWN(log))
2666 goto error_return_unlocked;
2667
2668 free_bytes = xlog_space_left(log, &log->l_grant_write_head); 2694 free_bytes = xlog_space_left(log, &log->l_grant_write_head);
2669 if (free_bytes < need_bytes) { 2695 if (!list_empty_careful(&log->l_writeq)) {
2670 spin_lock(&log->l_grant_write_lock); 2696 spin_lock(&log->l_grant_write_lock);
2671 if (list_empty(&tic->t_queue)) 2697 if (!xlog_writeq_wake(log, &free_bytes) ||
2672 list_add_tail(&tic->t_queue, &log->l_writeq); 2698 free_bytes < need_bytes)
2673 2699 error = xlog_writeq_wait(log, tic, need_bytes);
2674 if (XLOG_FORCED_SHUTDOWN(log)) 2700 spin_unlock(&log->l_grant_write_lock);
2675 goto error_return; 2701 } else if (free_bytes < need_bytes) {
2676
2677 xlog_grant_push_ail(log, need_bytes);
2678
2679 XFS_STATS_INC(xs_sleep_logspace);
2680 trace_xfs_log_regrant_write_sleep2(log, tic);
2681 xlog_wait(&tic->t_wait, &log->l_grant_write_lock);
2682
2683 trace_xfs_log_regrant_write_wake2(log, tic);
2684 goto redo;
2685 }
2686
2687 if (!list_empty(&tic->t_queue)) {
2688 spin_lock(&log->l_grant_write_lock); 2702 spin_lock(&log->l_grant_write_lock);
2689 list_del_init(&tic->t_queue); 2703 error = xlog_writeq_wait(log, tic, need_bytes);
2690 spin_unlock(&log->l_grant_write_lock); 2704 spin_unlock(&log->l_grant_write_lock);
2691 } 2705 }
2692 2706
2693 /* we've got enough space */ 2707 if (error)
2708 return error;
2709
2694 xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes); 2710 xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes);
2695 trace_xfs_log_regrant_write_exit(log, tic); 2711 trace_xfs_log_regrant_write_exit(log, tic);
2696 xlog_verify_grant_tail(log); 2712 xlog_verify_grant_tail(log);
2697 return 0; 2713 return 0;
2698 2714}
2699
2700 error_return_unlocked:
2701 spin_lock(&log->l_grant_write_lock);
2702 error_return:
2703 list_del_init(&tic->t_queue);
2704 spin_unlock(&log->l_grant_write_lock);
2705 trace_xfs_log_regrant_write_error(log, tic);
2706
2707 /*
2708 * If we are failing, make sure the ticket doesn't have any
2709 * current reservations. We don't want to add this back when
2710 * the ticket/transaction gets cancelled.
2711 */
2712 tic->t_curr_res = 0;
2713 tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */
2714 return XFS_ERROR(EIO);
2715} /* xlog_regrant_write_log_space */
2716
2717 2715
2718/* The first cnt-1 times through here we don't need to 2716/* The first cnt-1 times through here we don't need to
2719 * move the grant write head because the permanent 2717 * move the grant write head because the permanent
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
index aa3dc1a4d53d..be5c51d8f757 100644
--- a/fs/xfs/xfs_sync.c
+++ b/fs/xfs/xfs_sync.c
@@ -770,6 +770,17 @@ restart:
770 if (!xfs_iflock_nowait(ip)) { 770 if (!xfs_iflock_nowait(ip)) {
771 if (!(sync_mode & SYNC_WAIT)) 771 if (!(sync_mode & SYNC_WAIT))
772 goto out; 772 goto out;
773
774 /*
775 * If we only have a single dirty inode in a cluster there is
776 * a fair chance that the AIL push may have pushed it into
777 * the buffer, but xfsbufd won't touch it until 30 seconds
778 * from now, and thus we will lock up here.
779 *
780 * Promote the inode buffer to the front of the delwri list
781 * and wake up xfsbufd now.
782 */
783 xfs_promote_inode(ip);
773 xfs_iflock(ip); 784 xfs_iflock(ip);
774 } 785 }
775 786
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index f1d2802b2f07..494035798873 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -834,18 +834,14 @@ DEFINE_LOGGRANT_EVENT(xfs_log_umount_write);
834DEFINE_LOGGRANT_EVENT(xfs_log_grant_enter); 834DEFINE_LOGGRANT_EVENT(xfs_log_grant_enter);
835DEFINE_LOGGRANT_EVENT(xfs_log_grant_exit); 835DEFINE_LOGGRANT_EVENT(xfs_log_grant_exit);
836DEFINE_LOGGRANT_EVENT(xfs_log_grant_error); 836DEFINE_LOGGRANT_EVENT(xfs_log_grant_error);
837DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep1); 837DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep);
838DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake1); 838DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake);
839DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep2);
840DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake2);
841DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake_up); 839DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake_up);
842DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter); 840DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter);
843DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit); 841DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit);
844DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error); 842DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error);
845DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep1); 843DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep);
846DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake1); 844DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake);
847DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep2);
848DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake2);
849DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake_up); 845DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake_up);
850DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter); 846DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter);
851DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit); 847DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit);