aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_inode.c3
-rw-r--r--fs/9p/vfs_inode_dotl.c3
-rw-r--r--fs/btrfs/btrfs_inode.h2
-rw-r--r--fs/btrfs/ctree.h1
-rw-r--r--fs/btrfs/disk-io.c41
-rw-r--r--fs/btrfs/extent-tree.c16
-rw-r--r--fs/btrfs/inode-map.c17
-rw-r--r--fs/btrfs/inode.c94
-rw-r--r--fs/btrfs/ioctl.c259
-rw-r--r--fs/btrfs/ordered-data.c5
-rw-r--r--fs/btrfs/qgroup.c49
-rw-r--r--fs/btrfs/relocation.c2
-rw-r--r--fs/btrfs/scrub.c39
-rw-r--r--fs/btrfs/transaction.c4
-rw-r--r--fs/btrfs/tree-log.c226
-rw-r--r--fs/btrfs/volumes.c50
-rw-r--r--fs/compat_ioctl.c1
-rw-r--r--fs/configfs/item.c4
-rw-r--r--fs/dcache.c7
-rw-r--r--fs/ecryptfs/file.c1
-rw-r--r--fs/ext4/extents.c6
-rw-r--r--fs/ext4/inode.c22
-rw-r--r--fs/ext4/ioctl.c1
-rw-r--r--fs/ext4/mballoc.c16
-rw-r--r--fs/ext4/migrate.c17
-rw-r--r--fs/hpfs/alloc.c95
-rw-r--r--fs/hpfs/dir.c1
-rw-r--r--fs/hpfs/file.c1
-rw-r--r--fs/hpfs/hpfs_fn.h4
-rw-r--r--fs/hpfs/super.c47
-rw-r--r--fs/jfs/file.c2
-rw-r--r--fs/jfs/inode.c4
-rw-r--r--fs/jfs/ioctl.c3
-rw-r--r--fs/jfs/namei.c27
-rw-r--r--fs/locks.c38
-rw-r--r--fs/nfs/nfs4proc.c18
-rw-r--r--fs/nilfs2/ioctl.c1
-rw-r--r--fs/notify/mark.c34
-rw-r--r--fs/ocfs2/ioctl.c1
-rw-r--r--fs/overlayfs/inode.c3
-rw-r--r--fs/proc/Kconfig6
-rw-r--r--fs/proc/base.c5
-rw-r--r--fs/proc/kcore.c4
43 files changed, 938 insertions, 242 deletions
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 510040b04c96..b1dc51888048 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -540,8 +540,7 @@ static struct inode *v9fs_qid_iget(struct super_block *sb,
540 unlock_new_inode(inode); 540 unlock_new_inode(inode);
541 return inode; 541 return inode;
542error: 542error:
543 unlock_new_inode(inode); 543 iget_failed(inode);
544 iput(inode);
545 return ERR_PTR(retval); 544 return ERR_PTR(retval);
546 545
547} 546}
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 09e4433717b8..e8aa57dc8d6d 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -149,8 +149,7 @@ static struct inode *v9fs_qid_iget_dotl(struct super_block *sb,
149 unlock_new_inode(inode); 149 unlock_new_inode(inode);
150 return inode; 150 return inode;
151error: 151error:
152 unlock_new_inode(inode); 152 iget_failed(inode);
153 iput(inode);
154 return ERR_PTR(retval); 153 return ERR_PTR(retval);
155 154
156} 155}
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 0ef5cc13fae2..81220b2203c6 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -44,6 +44,8 @@
44#define BTRFS_INODE_IN_DELALLOC_LIST 9 44#define BTRFS_INODE_IN_DELALLOC_LIST 9
45#define BTRFS_INODE_READDIO_NEED_LOCK 10 45#define BTRFS_INODE_READDIO_NEED_LOCK 10
46#define BTRFS_INODE_HAS_PROPS 11 46#define BTRFS_INODE_HAS_PROPS 11
47/* DIO is ready to submit */
48#define BTRFS_INODE_DIO_READY 12
47/* 49/*
48 * The following 3 bits are meant only for the btree inode. 50 * The following 3 bits are meant only for the btree inode.
49 * When any of them is set, it means an error happened while writing an 51 * When any of them is set, it means an error happened while writing an
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 80a9aefb0c46..aac314e14188 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1778,6 +1778,7 @@ struct btrfs_fs_info {
1778 spinlock_t unused_bgs_lock; 1778 spinlock_t unused_bgs_lock;
1779 struct list_head unused_bgs; 1779 struct list_head unused_bgs;
1780 struct mutex unused_bg_unpin_mutex; 1780 struct mutex unused_bg_unpin_mutex;
1781 struct mutex delete_unused_bgs_mutex;
1781 1782
1782 /* For btrfs to record security options */ 1783 /* For btrfs to record security options */
1783 struct security_mnt_opts security_opts; 1784 struct security_mnt_opts security_opts;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 3f43bfea3684..a9aadb2ad525 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1751,6 +1751,7 @@ static int cleaner_kthread(void *arg)
1751{ 1751{
1752 struct btrfs_root *root = arg; 1752 struct btrfs_root *root = arg;
1753 int again; 1753 int again;
1754 struct btrfs_trans_handle *trans;
1754 1755
1755 do { 1756 do {
1756 again = 0; 1757 again = 0;
@@ -1772,7 +1773,6 @@ static int cleaner_kthread(void *arg)
1772 } 1773 }
1773 1774
1774 btrfs_run_delayed_iputs(root); 1775 btrfs_run_delayed_iputs(root);
1775 btrfs_delete_unused_bgs(root->fs_info);
1776 again = btrfs_clean_one_deleted_snapshot(root); 1776 again = btrfs_clean_one_deleted_snapshot(root);
1777 mutex_unlock(&root->fs_info->cleaner_mutex); 1777 mutex_unlock(&root->fs_info->cleaner_mutex);
1778 1778
@@ -1781,6 +1781,16 @@ static int cleaner_kthread(void *arg)
1781 * needn't do anything special here. 1781 * needn't do anything special here.
1782 */ 1782 */
1783 btrfs_run_defrag_inodes(root->fs_info); 1783 btrfs_run_defrag_inodes(root->fs_info);
1784
1785 /*
1786 * Acquires fs_info->delete_unused_bgs_mutex to avoid racing
1787 * with relocation (btrfs_relocate_chunk) and relocation
1788 * acquires fs_info->cleaner_mutex (btrfs_relocate_block_group)
1789 * after acquiring fs_info->delete_unused_bgs_mutex. So we
1790 * can't hold, nor need to, fs_info->cleaner_mutex when deleting
1791 * unused block groups.
1792 */
1793 btrfs_delete_unused_bgs(root->fs_info);
1784sleep: 1794sleep:
1785 if (!try_to_freeze() && !again) { 1795 if (!try_to_freeze() && !again) {
1786 set_current_state(TASK_INTERRUPTIBLE); 1796 set_current_state(TASK_INTERRUPTIBLE);
@@ -1789,6 +1799,34 @@ sleep:
1789 __set_current_state(TASK_RUNNING); 1799 __set_current_state(TASK_RUNNING);
1790 } 1800 }
1791 } while (!kthread_should_stop()); 1801 } while (!kthread_should_stop());
1802
1803 /*
1804 * Transaction kthread is stopped before us and wakes us up.
1805 * However we might have started a new transaction and COWed some
1806 * tree blocks when deleting unused block groups for example. So
1807 * make sure we commit the transaction we started to have a clean
1808 * shutdown when evicting the btree inode - if it has dirty pages
1809 * when we do the final iput() on it, eviction will trigger a
1810 * writeback for it which will fail with null pointer dereferences
1811 * since work queues and other resources were already released and
1812 * destroyed by the time the iput/eviction/writeback is made.
1813 */
1814 trans = btrfs_attach_transaction(root);
1815 if (IS_ERR(trans)) {
1816 if (PTR_ERR(trans) != -ENOENT)
1817 btrfs_err(root->fs_info,
1818 "cleaner transaction attach returned %ld",
1819 PTR_ERR(trans));
1820 } else {
1821 int ret;
1822
1823 ret = btrfs_commit_transaction(trans, root);
1824 if (ret)
1825 btrfs_err(root->fs_info,
1826 "cleaner open transaction commit returned %d",
1827 ret);
1828 }
1829
1792 return 0; 1830 return 0;
1793} 1831}
1794 1832
@@ -2492,6 +2530,7 @@ int open_ctree(struct super_block *sb,
2492 spin_lock_init(&fs_info->unused_bgs_lock); 2530 spin_lock_init(&fs_info->unused_bgs_lock);
2493 rwlock_init(&fs_info->tree_mod_log_lock); 2531 rwlock_init(&fs_info->tree_mod_log_lock);
2494 mutex_init(&fs_info->unused_bg_unpin_mutex); 2532 mutex_init(&fs_info->unused_bg_unpin_mutex);
2533 mutex_init(&fs_info->delete_unused_bgs_mutex);
2495 mutex_init(&fs_info->reloc_mutex); 2534 mutex_init(&fs_info->reloc_mutex);
2496 mutex_init(&fs_info->delalloc_root_mutex); 2535 mutex_init(&fs_info->delalloc_root_mutex);
2497 seqlock_init(&fs_info->profiles_lock); 2536 seqlock_init(&fs_info->profiles_lock);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 38b76cc02f48..171312d51799 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2296,9 +2296,22 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
2296static inline struct btrfs_delayed_ref_node * 2296static inline struct btrfs_delayed_ref_node *
2297select_delayed_ref(struct btrfs_delayed_ref_head *head) 2297select_delayed_ref(struct btrfs_delayed_ref_head *head)
2298{ 2298{
2299 struct btrfs_delayed_ref_node *ref;
2300
2299 if (list_empty(&head->ref_list)) 2301 if (list_empty(&head->ref_list))
2300 return NULL; 2302 return NULL;
2301 2303
2304 /*
2305 * Select a delayed ref of type BTRFS_ADD_DELAYED_REF first.
2306 * This is to prevent a ref count from going down to zero, which deletes
2307 * the extent item from the extent tree, when there still are references
2308 * to add, which would fail because they would not find the extent item.
2309 */
2310 list_for_each_entry(ref, &head->ref_list, list) {
2311 if (ref->action == BTRFS_ADD_DELAYED_REF)
2312 return ref;
2313 }
2314
2302 return list_entry(head->ref_list.next, struct btrfs_delayed_ref_node, 2315 return list_entry(head->ref_list.next, struct btrfs_delayed_ref_node,
2303 list); 2316 list);
2304} 2317}
@@ -9889,6 +9902,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
9889 } 9902 }
9890 spin_unlock(&fs_info->unused_bgs_lock); 9903 spin_unlock(&fs_info->unused_bgs_lock);
9891 9904
9905 mutex_lock(&root->fs_info->delete_unused_bgs_mutex);
9906
9892 /* Don't want to race with allocators so take the groups_sem */ 9907 /* Don't want to race with allocators so take the groups_sem */
9893 down_write(&space_info->groups_sem); 9908 down_write(&space_info->groups_sem);
9894 spin_lock(&block_group->lock); 9909 spin_lock(&block_group->lock);
@@ -9983,6 +9998,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
9983end_trans: 9998end_trans:
9984 btrfs_end_transaction(trans, root); 9999 btrfs_end_transaction(trans, root);
9985next: 10000next:
10001 mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
9986 btrfs_put_block_group(block_group); 10002 btrfs_put_block_group(block_group);
9987 spin_lock(&fs_info->unused_bgs_lock); 10003 spin_lock(&fs_info->unused_bgs_lock);
9988 } 10004 }
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index f6a596d5a637..d4a582ac3f73 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -246,6 +246,7 @@ void btrfs_unpin_free_ino(struct btrfs_root *root)
246{ 246{
247 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 247 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
248 struct rb_root *rbroot = &root->free_ino_pinned->free_space_offset; 248 struct rb_root *rbroot = &root->free_ino_pinned->free_space_offset;
249 spinlock_t *rbroot_lock = &root->free_ino_pinned->tree_lock;
249 struct btrfs_free_space *info; 250 struct btrfs_free_space *info;
250 struct rb_node *n; 251 struct rb_node *n;
251 u64 count; 252 u64 count;
@@ -254,24 +255,30 @@ void btrfs_unpin_free_ino(struct btrfs_root *root)
254 return; 255 return;
255 256
256 while (1) { 257 while (1) {
258 bool add_to_ctl = true;
259
260 spin_lock(rbroot_lock);
257 n = rb_first(rbroot); 261 n = rb_first(rbroot);
258 if (!n) 262 if (!n) {
263 spin_unlock(rbroot_lock);
259 break; 264 break;
265 }
260 266
261 info = rb_entry(n, struct btrfs_free_space, offset_index); 267 info = rb_entry(n, struct btrfs_free_space, offset_index);
262 BUG_ON(info->bitmap); /* Logic error */ 268 BUG_ON(info->bitmap); /* Logic error */
263 269
264 if (info->offset > root->ino_cache_progress) 270 if (info->offset > root->ino_cache_progress)
265 goto free; 271 add_to_ctl = false;
266 else if (info->offset + info->bytes > root->ino_cache_progress) 272 else if (info->offset + info->bytes > root->ino_cache_progress)
267 count = root->ino_cache_progress - info->offset + 1; 273 count = root->ino_cache_progress - info->offset + 1;
268 else 274 else
269 count = info->bytes; 275 count = info->bytes;
270 276
271 __btrfs_add_free_space(ctl, info->offset, count);
272free:
273 rb_erase(&info->offset_index, rbroot); 277 rb_erase(&info->offset_index, rbroot);
274 kfree(info); 278 spin_unlock(rbroot_lock);
279 if (add_to_ctl)
280 __btrfs_add_free_space(ctl, info->offset, count);
281 kmem_cache_free(btrfs_free_space_cachep, info);
275 } 282 }
276} 283}
277 284
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 855935f6671a..e33dff356460 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4209,7 +4209,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
4209 u64 extent_num_bytes = 0; 4209 u64 extent_num_bytes = 0;
4210 u64 extent_offset = 0; 4210 u64 extent_offset = 0;
4211 u64 item_end = 0; 4211 u64 item_end = 0;
4212 u64 last_size = (u64)-1; 4212 u64 last_size = new_size;
4213 u32 found_type = (u8)-1; 4213 u32 found_type = (u8)-1;
4214 int found_extent; 4214 int found_extent;
4215 int del_item; 4215 int del_item;
@@ -4493,8 +4493,7 @@ out:
4493 btrfs_abort_transaction(trans, root, ret); 4493 btrfs_abort_transaction(trans, root, ret);
4494 } 4494 }
4495error: 4495error:
4496 if (last_size != (u64)-1 && 4496 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
4497 root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
4498 btrfs_ordered_update_i_size(inode, last_size, NULL); 4497 btrfs_ordered_update_i_size(inode, last_size, NULL);
4499 4498
4500 btrfs_free_path(path); 4499 btrfs_free_path(path);
@@ -4989,8 +4988,9 @@ static void evict_inode_truncate_pages(struct inode *inode)
4989 /* 4988 /*
4990 * Keep looping until we have no more ranges in the io tree. 4989 * Keep looping until we have no more ranges in the io tree.
4991 * We can have ongoing bios started by readpages (called from readahead) 4990 * We can have ongoing bios started by readpages (called from readahead)
4992 * that didn't get their end io callbacks called yet or they are still 4991 * that have their endio callback (extent_io.c:end_bio_extent_readpage)
4993 * in progress ((extent_io.c:end_bio_extent_readpage()). This means some 4992 * still in progress (unlocked the pages in the bio but did not yet
4993 * unlocked the ranges in the io tree). Therefore this means some
4994 * ranges can still be locked and eviction started because before 4994 * ranges can still be locked and eviction started because before
4995 * submitting those bios, which are executed by a separate task (work 4995 * submitting those bios, which are executed by a separate task (work
4996 * queue kthread), inode references (inode->i_count) were not taken 4996 * queue kthread), inode references (inode->i_count) were not taken
@@ -7546,6 +7546,7 @@ unlock:
7546 7546
7547 current->journal_info = outstanding_extents; 7547 current->journal_info = outstanding_extents;
7548 btrfs_free_reserved_data_space(inode, len); 7548 btrfs_free_reserved_data_space(inode, len);
7549 set_bit(BTRFS_INODE_DIO_READY, &BTRFS_I(inode)->runtime_flags);
7549 } 7550 }
7550 7551
7551 /* 7552 /*
@@ -7871,8 +7872,6 @@ static void btrfs_endio_direct_write(struct bio *bio, int err)
7871 struct bio *dio_bio; 7872 struct bio *dio_bio;
7872 int ret; 7873 int ret;
7873 7874
7874 if (err)
7875 goto out_done;
7876again: 7875again:
7877 ret = btrfs_dec_test_first_ordered_pending(inode, &ordered, 7876 ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
7878 &ordered_offset, 7877 &ordered_offset,
@@ -7895,7 +7894,6 @@ out_test:
7895 ordered = NULL; 7894 ordered = NULL;
7896 goto again; 7895 goto again;
7897 } 7896 }
7898out_done:
7899 dio_bio = dip->dio_bio; 7897 dio_bio = dip->dio_bio;
7900 7898
7901 kfree(dip); 7899 kfree(dip);
@@ -8163,9 +8161,8 @@ out_err:
8163static void btrfs_submit_direct(int rw, struct bio *dio_bio, 8161static void btrfs_submit_direct(int rw, struct bio *dio_bio,
8164 struct inode *inode, loff_t file_offset) 8162 struct inode *inode, loff_t file_offset)
8165{ 8163{
8166 struct btrfs_root *root = BTRFS_I(inode)->root; 8164 struct btrfs_dio_private *dip = NULL;
8167 struct btrfs_dio_private *dip; 8165 struct bio *io_bio = NULL;
8168 struct bio *io_bio;
8169 struct btrfs_io_bio *btrfs_bio; 8166 struct btrfs_io_bio *btrfs_bio;
8170 int skip_sum; 8167 int skip_sum;
8171 int write = rw & REQ_WRITE; 8168 int write = rw & REQ_WRITE;
@@ -8182,7 +8179,7 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
8182 dip = kzalloc(sizeof(*dip), GFP_NOFS); 8179 dip = kzalloc(sizeof(*dip), GFP_NOFS);
8183 if (!dip) { 8180 if (!dip) {
8184 ret = -ENOMEM; 8181 ret = -ENOMEM;
8185 goto free_io_bio; 8182 goto free_ordered;
8186 } 8183 }
8187 8184
8188 dip->private = dio_bio->bi_private; 8185 dip->private = dio_bio->bi_private;
@@ -8210,25 +8207,55 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
8210 8207
8211 if (btrfs_bio->end_io) 8208 if (btrfs_bio->end_io)
8212 btrfs_bio->end_io(btrfs_bio, ret); 8209 btrfs_bio->end_io(btrfs_bio, ret);
8213free_io_bio:
8214 bio_put(io_bio);
8215 8210
8216free_ordered: 8211free_ordered:
8217 /* 8212 /*
8218 * If this is a write, we need to clean up the reserved space and kill 8213 * If we arrived here it means either we failed to submit the dip
8219 * the ordered extent. 8214 * or we either failed to clone the dio_bio or failed to allocate the
8215 * dip. If we cloned the dio_bio and allocated the dip, we can just
8216 * call bio_endio against our io_bio so that we get proper resource
8217 * cleanup if we fail to submit the dip, otherwise, we must do the
8218 * same as btrfs_endio_direct_[write|read] because we can't call these
8219 * callbacks - they require an allocated dip and a clone of dio_bio.
8220 */ 8220 */
8221 if (write) { 8221 if (io_bio && dip) {
8222 struct btrfs_ordered_extent *ordered; 8222 bio_endio(io_bio, ret);
8223 ordered = btrfs_lookup_ordered_extent(inode, file_offset); 8223 /*
8224 if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) && 8224 * The end io callbacks free our dip, do the final put on io_bio
8225 !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) 8225 * and all the cleanup and final put for dio_bio (through
8226 btrfs_free_reserved_extent(root, ordered->start, 8226 * dio_end_io()).
8227 ordered->disk_len, 1); 8227 */
8228 btrfs_put_ordered_extent(ordered); 8228 dip = NULL;
8229 btrfs_put_ordered_extent(ordered); 8229 io_bio = NULL;
8230 } else {
8231 if (write) {
8232 struct btrfs_ordered_extent *ordered;
8233
8234 ordered = btrfs_lookup_ordered_extent(inode,
8235 file_offset);
8236 set_bit(BTRFS_ORDERED_IOERR, &ordered->flags);
8237 /*
8238 * Decrements our ref on the ordered extent and removes
8239 * the ordered extent from the inode's ordered tree,
8240 * doing all the proper resource cleanup such as for the
8241 * reserved space and waking up any waiters for this
8242 * ordered extent (through btrfs_remove_ordered_extent).
8243 */
8244 btrfs_finish_ordered_io(ordered);
8245 } else {
8246 unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
8247 file_offset + dio_bio->bi_iter.bi_size - 1);
8248 }
8249 clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
8250 /*
8251 * Releases and cleans up our dio_bio, no need to bio_put()
8252 * nor bio_endio()/bio_io_error() against dio_bio.
8253 */
8254 dio_end_io(dio_bio, ret);
8230 } 8255 }
8231 bio_endio(dio_bio, ret); 8256 if (io_bio)
8257 bio_put(io_bio);
8258 kfree(dip);
8232} 8259}
8233 8260
8234static ssize_t check_direct_IO(struct btrfs_root *root, struct kiocb *iocb, 8261static ssize_t check_direct_IO(struct btrfs_root *root, struct kiocb *iocb,
@@ -8330,9 +8357,18 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
8330 btrfs_submit_direct, flags); 8357 btrfs_submit_direct, flags);
8331 if (iov_iter_rw(iter) == WRITE) { 8358 if (iov_iter_rw(iter) == WRITE) {
8332 current->journal_info = NULL; 8359 current->journal_info = NULL;
8333 if (ret < 0 && ret != -EIOCBQUEUED) 8360 if (ret < 0 && ret != -EIOCBQUEUED) {
8334 btrfs_delalloc_release_space(inode, count); 8361 /*
8335 else if (ret >= 0 && (size_t)ret < count) 8362 * If the error comes from submitting stage,
8363 * btrfs_get_blocsk_direct() has free'd data space,
8364 * and metadata space will be handled by
8365 * finish_ordered_fn, don't do that again to make
8366 * sure bytes_may_use is correct.
8367 */
8368 if (!test_and_clear_bit(BTRFS_INODE_DIO_READY,
8369 &BTRFS_I(inode)->runtime_flags))
8370 btrfs_delalloc_release_space(inode, count);
8371 } else if (ret >= 0 && (size_t)ret < count)
8336 btrfs_delalloc_release_space(inode, 8372 btrfs_delalloc_release_space(inode,
8337 count - (size_t)ret); 8373 count - (size_t)ret);
8338 } 8374 }
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index c86b835da7a8..0770c91586ca 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -87,7 +87,8 @@ struct btrfs_ioctl_received_subvol_args_32 {
87 87
88 88
89static int btrfs_clone(struct inode *src, struct inode *inode, 89static int btrfs_clone(struct inode *src, struct inode *inode,
90 u64 off, u64 olen, u64 olen_aligned, u64 destoff); 90 u64 off, u64 olen, u64 olen_aligned, u64 destoff,
91 int no_time_update);
91 92
92/* Mask out flags that are inappropriate for the given type of inode. */ 93/* Mask out flags that are inappropriate for the given type of inode. */
93static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) 94static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags)
@@ -2765,14 +2766,11 @@ out:
2765 return ret; 2766 return ret;
2766} 2767}
2767 2768
2768static struct page *extent_same_get_page(struct inode *inode, u64 off) 2769static struct page *extent_same_get_page(struct inode *inode, pgoff_t index)
2769{ 2770{
2770 struct page *page; 2771 struct page *page;
2771 pgoff_t index;
2772 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; 2772 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2773 2773
2774 index = off >> PAGE_CACHE_SHIFT;
2775
2776 page = grab_cache_page(inode->i_mapping, index); 2774 page = grab_cache_page(inode->i_mapping, index);
2777 if (!page) 2775 if (!page)
2778 return NULL; 2776 return NULL;
@@ -2793,6 +2791,20 @@ static struct page *extent_same_get_page(struct inode *inode, u64 off)
2793 return page; 2791 return page;
2794} 2792}
2795 2793
2794static int gather_extent_pages(struct inode *inode, struct page **pages,
2795 int num_pages, u64 off)
2796{
2797 int i;
2798 pgoff_t index = off >> PAGE_CACHE_SHIFT;
2799
2800 for (i = 0; i < num_pages; i++) {
2801 pages[i] = extent_same_get_page(inode, index + i);
2802 if (!pages[i])
2803 return -ENOMEM;
2804 }
2805 return 0;
2806}
2807
2796static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) 2808static inline void lock_extent_range(struct inode *inode, u64 off, u64 len)
2797{ 2809{
2798 /* do any pending delalloc/csum calc on src, one way or 2810 /* do any pending delalloc/csum calc on src, one way or
@@ -2818,52 +2830,120 @@ static inline void lock_extent_range(struct inode *inode, u64 off, u64 len)
2818 } 2830 }
2819} 2831}
2820 2832
2821static void btrfs_double_unlock(struct inode *inode1, u64 loff1, 2833static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2)
2822 struct inode *inode2, u64 loff2, u64 len)
2823{ 2834{
2824 unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
2825 unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
2826
2827 mutex_unlock(&inode1->i_mutex); 2835 mutex_unlock(&inode1->i_mutex);
2828 mutex_unlock(&inode2->i_mutex); 2836 mutex_unlock(&inode2->i_mutex);
2829} 2837}
2830 2838
2831static void btrfs_double_lock(struct inode *inode1, u64 loff1, 2839static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2)
2832 struct inode *inode2, u64 loff2, u64 len) 2840{
2841 if (inode1 < inode2)
2842 swap(inode1, inode2);
2843
2844 mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
2845 if (inode1 != inode2)
2846 mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
2847}
2848
2849static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1,
2850 struct inode *inode2, u64 loff2, u64 len)
2851{
2852 unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
2853 unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
2854}
2855
2856static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
2857 struct inode *inode2, u64 loff2, u64 len)
2833{ 2858{
2834 if (inode1 < inode2) { 2859 if (inode1 < inode2) {
2835 swap(inode1, inode2); 2860 swap(inode1, inode2);
2836 swap(loff1, loff2); 2861 swap(loff1, loff2);
2837 } 2862 }
2838
2839 mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
2840 lock_extent_range(inode1, loff1, len); 2863 lock_extent_range(inode1, loff1, len);
2841 if (inode1 != inode2) { 2864 if (inode1 != inode2)
2842 mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
2843 lock_extent_range(inode2, loff2, len); 2865 lock_extent_range(inode2, loff2, len);
2866}
2867
2868struct cmp_pages {
2869 int num_pages;
2870 struct page **src_pages;
2871 struct page **dst_pages;
2872};
2873
2874static void btrfs_cmp_data_free(struct cmp_pages *cmp)
2875{
2876 int i;
2877 struct page *pg;
2878
2879 for (i = 0; i < cmp->num_pages; i++) {
2880 pg = cmp->src_pages[i];
2881 if (pg)
2882 page_cache_release(pg);
2883 pg = cmp->dst_pages[i];
2884 if (pg)
2885 page_cache_release(pg);
2886 }
2887 kfree(cmp->src_pages);
2888 kfree(cmp->dst_pages);
2889}
2890
2891static int btrfs_cmp_data_prepare(struct inode *src, u64 loff,
2892 struct inode *dst, u64 dst_loff,
2893 u64 len, struct cmp_pages *cmp)
2894{
2895 int ret;
2896 int num_pages = PAGE_CACHE_ALIGN(len) >> PAGE_CACHE_SHIFT;
2897 struct page **src_pgarr, **dst_pgarr;
2898
2899 /*
2900 * We must gather up all the pages before we initiate our
2901 * extent locking. We use an array for the page pointers. Size
2902 * of the array is bounded by len, which is in turn bounded by
2903 * BTRFS_MAX_DEDUPE_LEN.
2904 */
2905 src_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS);
2906 dst_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS);
2907 if (!src_pgarr || !dst_pgarr) {
2908 kfree(src_pgarr);
2909 kfree(dst_pgarr);
2910 return -ENOMEM;
2844 } 2911 }
2912 cmp->num_pages = num_pages;
2913 cmp->src_pages = src_pgarr;
2914 cmp->dst_pages = dst_pgarr;
2915
2916 ret = gather_extent_pages(src, cmp->src_pages, cmp->num_pages, loff);
2917 if (ret)
2918 goto out;
2919
2920 ret = gather_extent_pages(dst, cmp->dst_pages, cmp->num_pages, dst_loff);
2921
2922out:
2923 if (ret)
2924 btrfs_cmp_data_free(cmp);
2925 return 0;
2845} 2926}
2846 2927
2847static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst, 2928static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst,
2848 u64 dst_loff, u64 len) 2929 u64 dst_loff, u64 len, struct cmp_pages *cmp)
2849{ 2930{
2850 int ret = 0; 2931 int ret = 0;
2932 int i;
2851 struct page *src_page, *dst_page; 2933 struct page *src_page, *dst_page;
2852 unsigned int cmp_len = PAGE_CACHE_SIZE; 2934 unsigned int cmp_len = PAGE_CACHE_SIZE;
2853 void *addr, *dst_addr; 2935 void *addr, *dst_addr;
2854 2936
2937 i = 0;
2855 while (len) { 2938 while (len) {
2856 if (len < PAGE_CACHE_SIZE) 2939 if (len < PAGE_CACHE_SIZE)
2857 cmp_len = len; 2940 cmp_len = len;
2858 2941
2859 src_page = extent_same_get_page(src, loff); 2942 BUG_ON(i >= cmp->num_pages);
2860 if (!src_page) 2943
2861 return -EINVAL; 2944 src_page = cmp->src_pages[i];
2862 dst_page = extent_same_get_page(dst, dst_loff); 2945 dst_page = cmp->dst_pages[i];
2863 if (!dst_page) { 2946
2864 page_cache_release(src_page);
2865 return -EINVAL;
2866 }
2867 addr = kmap_atomic(src_page); 2947 addr = kmap_atomic(src_page);
2868 dst_addr = kmap_atomic(dst_page); 2948 dst_addr = kmap_atomic(dst_page);
2869 2949
@@ -2875,15 +2955,12 @@ static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst,
2875 2955
2876 kunmap_atomic(addr); 2956 kunmap_atomic(addr);
2877 kunmap_atomic(dst_addr); 2957 kunmap_atomic(dst_addr);
2878 page_cache_release(src_page);
2879 page_cache_release(dst_page);
2880 2958
2881 if (ret) 2959 if (ret)
2882 break; 2960 break;
2883 2961
2884 loff += cmp_len;
2885 dst_loff += cmp_len;
2886 len -= cmp_len; 2962 len -= cmp_len;
2963 i++;
2887 } 2964 }
2888 2965
2889 return ret; 2966 return ret;
@@ -2914,27 +2991,62 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
2914{ 2991{
2915 int ret; 2992 int ret;
2916 u64 len = olen; 2993 u64 len = olen;
2994 struct cmp_pages cmp;
2995 int same_inode = 0;
2996 u64 same_lock_start = 0;
2997 u64 same_lock_len = 0;
2917 2998
2918 /*
2919 * btrfs_clone() can't handle extents in the same file
2920 * yet. Once that works, we can drop this check and replace it
2921 * with a check for the same inode, but overlapping extents.
2922 */
2923 if (src == dst) 2999 if (src == dst)
2924 return -EINVAL; 3000 same_inode = 1;
2925 3001
2926 if (len == 0) 3002 if (len == 0)
2927 return 0; 3003 return 0;
2928 3004
2929 btrfs_double_lock(src, loff, dst, dst_loff, len); 3005 if (same_inode) {
3006 mutex_lock(&src->i_mutex);
2930 3007
2931 ret = extent_same_check_offsets(src, loff, &len, olen); 3008 ret = extent_same_check_offsets(src, loff, &len, olen);
2932 if (ret) 3009 if (ret)
2933 goto out_unlock; 3010 goto out_unlock;
2934 3011
2935 ret = extent_same_check_offsets(dst, dst_loff, &len, olen); 3012 /*
2936 if (ret) 3013 * Single inode case wants the same checks, except we
2937 goto out_unlock; 3014 * don't want our length pushed out past i_size as
3015 * comparing that data range makes no sense.
3016 *
3017 * extent_same_check_offsets() will do this for an
3018 * unaligned length at i_size, so catch it here and
3019 * reject the request.
3020 *
3021 * This effectively means we require aligned extents
3022 * for the single-inode case, whereas the other cases
3023 * allow an unaligned length so long as it ends at
3024 * i_size.
3025 */
3026 if (len != olen) {
3027 ret = -EINVAL;
3028 goto out_unlock;
3029 }
3030
3031 /* Check for overlapping ranges */
3032 if (dst_loff + len > loff && dst_loff < loff + len) {
3033 ret = -EINVAL;
3034 goto out_unlock;
3035 }
3036
3037 same_lock_start = min_t(u64, loff, dst_loff);
3038 same_lock_len = max_t(u64, loff, dst_loff) + len - same_lock_start;
3039 } else {
3040 btrfs_double_inode_lock(src, dst);
3041
3042 ret = extent_same_check_offsets(src, loff, &len, olen);
3043 if (ret)
3044 goto out_unlock;
3045
3046 ret = extent_same_check_offsets(dst, dst_loff, &len, olen);
3047 if (ret)
3048 goto out_unlock;
3049 }
2938 3050
2939 /* don't make the dst file partly checksummed */ 3051 /* don't make the dst file partly checksummed */
2940 if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != 3052 if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
@@ -2943,12 +3055,32 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
2943 goto out_unlock; 3055 goto out_unlock;
2944 } 3056 }
2945 3057
2946 ret = btrfs_cmp_data(src, loff, dst, dst_loff, len); 3058 ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp);
3059 if (ret)
3060 goto out_unlock;
3061
3062 if (same_inode)
3063 lock_extent_range(src, same_lock_start, same_lock_len);
3064 else
3065 btrfs_double_extent_lock(src, loff, dst, dst_loff, len);
3066
3067 /* pass original length for comparison so we stay within i_size */
3068 ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp);
2947 if (ret == 0) 3069 if (ret == 0)
2948 ret = btrfs_clone(src, dst, loff, olen, len, dst_loff); 3070 ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1);
3071
3072 if (same_inode)
3073 unlock_extent(&BTRFS_I(src)->io_tree, same_lock_start,
3074 same_lock_start + same_lock_len - 1);
3075 else
3076 btrfs_double_extent_unlock(src, loff, dst, dst_loff, len);
2949 3077
3078 btrfs_cmp_data_free(&cmp);
2950out_unlock: 3079out_unlock:
2951 btrfs_double_unlock(src, loff, dst, dst_loff, len); 3080 if (same_inode)
3081 mutex_unlock(&src->i_mutex);
3082 else
3083 btrfs_double_inode_unlock(src, dst);
2952 3084
2953 return ret; 3085 return ret;
2954} 3086}
@@ -2958,7 +3090,7 @@ out_unlock:
2958static long btrfs_ioctl_file_extent_same(struct file *file, 3090static long btrfs_ioctl_file_extent_same(struct file *file,
2959 struct btrfs_ioctl_same_args __user *argp) 3091 struct btrfs_ioctl_same_args __user *argp)
2960{ 3092{
2961 struct btrfs_ioctl_same_args *same; 3093 struct btrfs_ioctl_same_args *same = NULL;
2962 struct btrfs_ioctl_same_extent_info *info; 3094 struct btrfs_ioctl_same_extent_info *info;
2963 struct inode *src = file_inode(file); 3095 struct inode *src = file_inode(file);
2964 u64 off; 3096 u64 off;
@@ -2988,6 +3120,7 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
2988 3120
2989 if (IS_ERR(same)) { 3121 if (IS_ERR(same)) {
2990 ret = PTR_ERR(same); 3122 ret = PTR_ERR(same);
3123 same = NULL;
2991 goto out; 3124 goto out;
2992 } 3125 }
2993 3126
@@ -3058,6 +3191,7 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
3058 3191
3059out: 3192out:
3060 mnt_drop_write_file(file); 3193 mnt_drop_write_file(file);
3194 kfree(same);
3061 return ret; 3195 return ret;
3062} 3196}
3063 3197
@@ -3100,13 +3234,15 @@ static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
3100 struct inode *inode, 3234 struct inode *inode,
3101 u64 endoff, 3235 u64 endoff,
3102 const u64 destoff, 3236 const u64 destoff,
3103 const u64 olen) 3237 const u64 olen,
3238 int no_time_update)
3104{ 3239{
3105 struct btrfs_root *root = BTRFS_I(inode)->root; 3240 struct btrfs_root *root = BTRFS_I(inode)->root;
3106 int ret; 3241 int ret;
3107 3242
3108 inode_inc_iversion(inode); 3243 inode_inc_iversion(inode);
3109 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 3244 if (!no_time_update)
3245 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
3110 /* 3246 /*
3111 * We round up to the block size at eof when determining which 3247 * We round up to the block size at eof when determining which
3112 * extents to clone above, but shouldn't round up the file size. 3248 * extents to clone above, but shouldn't round up the file size.
@@ -3191,13 +3327,13 @@ static void clone_update_extent_map(struct inode *inode,
3191 * @inode: Inode to clone to 3327 * @inode: Inode to clone to
3192 * @off: Offset within source to start clone from 3328 * @off: Offset within source to start clone from
3193 * @olen: Original length, passed by user, of range to clone 3329 * @olen: Original length, passed by user, of range to clone
3194 * @olen_aligned: Block-aligned value of olen, extent_same uses 3330 * @olen_aligned: Block-aligned value of olen
3195 * identical values here
3196 * @destoff: Offset within @inode to start clone 3331 * @destoff: Offset within @inode to start clone
3332 * @no_time_update: Whether to update mtime/ctime on the target inode
3197 */ 3333 */
3198static int btrfs_clone(struct inode *src, struct inode *inode, 3334static int btrfs_clone(struct inode *src, struct inode *inode,
3199 const u64 off, const u64 olen, const u64 olen_aligned, 3335 const u64 off, const u64 olen, const u64 olen_aligned,
3200 const u64 destoff) 3336 const u64 destoff, int no_time_update)
3201{ 3337{
3202 struct btrfs_root *root = BTRFS_I(inode)->root; 3338 struct btrfs_root *root = BTRFS_I(inode)->root;
3203 struct btrfs_path *path = NULL; 3339 struct btrfs_path *path = NULL;
@@ -3452,6 +3588,20 @@ process_slot:
3452 u64 trim = 0; 3588 u64 trim = 0;
3453 u64 aligned_end = 0; 3589 u64 aligned_end = 0;
3454 3590
3591 /*
3592 * Don't copy an inline extent into an offset
3593 * greater than zero. Having an inline extent
3594 * at such an offset results in chaos as btrfs
3595 * isn't prepared for such cases. Just skip
3596 * this case for the same reasons as commented
3597 * at btrfs_ioctl_clone().
3598 */
3599 if (last_dest_end > 0) {
3600 ret = -EOPNOTSUPP;
3601 btrfs_end_transaction(trans, root);
3602 goto out;
3603 }
3604
3455 if (off > key.offset) { 3605 if (off > key.offset) {
3456 skip = off - key.offset; 3606 skip = off - key.offset;
3457 new_key.offset += skip; 3607 new_key.offset += skip;
@@ -3521,7 +3671,8 @@ process_slot:
3521 root->sectorsize); 3671 root->sectorsize);
3522 ret = clone_finish_inode_update(trans, inode, 3672 ret = clone_finish_inode_update(trans, inode,
3523 last_dest_end, 3673 last_dest_end,
3524 destoff, olen); 3674 destoff, olen,
3675 no_time_update);
3525 if (ret) 3676 if (ret)
3526 goto out; 3677 goto out;
3527 if (new_key.offset + datal >= destoff + len) 3678 if (new_key.offset + datal >= destoff + len)
@@ -3559,7 +3710,7 @@ process_slot:
3559 clone_update_extent_map(inode, trans, NULL, last_dest_end, 3710 clone_update_extent_map(inode, trans, NULL, last_dest_end,
3560 destoff + len - last_dest_end); 3711 destoff + len - last_dest_end);
3561 ret = clone_finish_inode_update(trans, inode, destoff + len, 3712 ret = clone_finish_inode_update(trans, inode, destoff + len,
3562 destoff, olen); 3713 destoff, olen, no_time_update);
3563 } 3714 }
3564 3715
3565out: 3716out:
@@ -3696,7 +3847,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
3696 lock_extent_range(inode, destoff, len); 3847 lock_extent_range(inode, destoff, len);
3697 } 3848 }
3698 3849
3699 ret = btrfs_clone(src, inode, off, olen, len, destoff); 3850 ret = btrfs_clone(src, inode, off, olen, len, destoff, 0);
3700 3851
3701 if (same_inode) { 3852 if (same_inode) {
3702 u64 lock_start = min_t(u64, off, destoff); 3853 u64 lock_start = min_t(u64, off, destoff);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 89656d799ff6..52170cf1757e 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -552,6 +552,10 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
552 trace_btrfs_ordered_extent_put(entry->inode, entry); 552 trace_btrfs_ordered_extent_put(entry->inode, entry);
553 553
554 if (atomic_dec_and_test(&entry->refs)) { 554 if (atomic_dec_and_test(&entry->refs)) {
555 ASSERT(list_empty(&entry->log_list));
556 ASSERT(list_empty(&entry->trans_list));
557 ASSERT(list_empty(&entry->root_extent_list));
558 ASSERT(RB_EMPTY_NODE(&entry->rb_node));
555 if (entry->inode) 559 if (entry->inode)
556 btrfs_add_delayed_iput(entry->inode); 560 btrfs_add_delayed_iput(entry->inode);
557 while (!list_empty(&entry->list)) { 561 while (!list_empty(&entry->list)) {
@@ -579,6 +583,7 @@ void btrfs_remove_ordered_extent(struct inode *inode,
579 spin_lock_irq(&tree->lock); 583 spin_lock_irq(&tree->lock);
580 node = &entry->rb_node; 584 node = &entry->rb_node;
581 rb_erase(node, &tree->tree); 585 rb_erase(node, &tree->tree);
586 RB_CLEAR_NODE(node);
582 if (tree->last == node) 587 if (tree->last == node)
583 tree->last = NULL; 588 tree->last = NULL;
584 set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); 589 set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index d5f1f033b7a0..e9ace099162c 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1349,6 +1349,11 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
1349 struct btrfs_root *quota_root; 1349 struct btrfs_root *quota_root;
1350 struct btrfs_qgroup *qgroup; 1350 struct btrfs_qgroup *qgroup;
1351 int ret = 0; 1351 int ret = 0;
1352 /* Sometimes we would want to clear the limit on this qgroup.
1353 * To meet this requirement, we treat the -1 as a special value
1354 * which tell kernel to clear the limit on this qgroup.
1355 */
1356 const u64 CLEAR_VALUE = -1;
1352 1357
1353 mutex_lock(&fs_info->qgroup_ioctl_lock); 1358 mutex_lock(&fs_info->qgroup_ioctl_lock);
1354 quota_root = fs_info->quota_root; 1359 quota_root = fs_info->quota_root;
@@ -1364,14 +1369,42 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
1364 } 1369 }
1365 1370
1366 spin_lock(&fs_info->qgroup_lock); 1371 spin_lock(&fs_info->qgroup_lock);
1367 if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER) 1372 if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER) {
1368 qgroup->max_rfer = limit->max_rfer; 1373 if (limit->max_rfer == CLEAR_VALUE) {
1369 if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) 1374 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER;
1370 qgroup->max_excl = limit->max_excl; 1375 limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER;
1371 if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER) 1376 qgroup->max_rfer = 0;
1372 qgroup->rsv_rfer = limit->rsv_rfer; 1377 } else {
1373 if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL) 1378 qgroup->max_rfer = limit->max_rfer;
1374 qgroup->rsv_excl = limit->rsv_excl; 1379 }
1380 }
1381 if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) {
1382 if (limit->max_excl == CLEAR_VALUE) {
1383 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL;
1384 limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL;
1385 qgroup->max_excl = 0;
1386 } else {
1387 qgroup->max_excl = limit->max_excl;
1388 }
1389 }
1390 if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER) {
1391 if (limit->rsv_rfer == CLEAR_VALUE) {
1392 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER;
1393 limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER;
1394 qgroup->rsv_rfer = 0;
1395 } else {
1396 qgroup->rsv_rfer = limit->rsv_rfer;
1397 }
1398 }
1399 if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL) {
1400 if (limit->rsv_excl == CLEAR_VALUE) {
1401 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL;
1402 limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL;
1403 qgroup->rsv_excl = 0;
1404 } else {
1405 qgroup->rsv_excl = limit->rsv_excl;
1406 }
1407 }
1375 qgroup->lim_flags |= limit->flags; 1408 qgroup->lim_flags |= limit->flags;
1376 1409
1377 spin_unlock(&fs_info->qgroup_lock); 1410 spin_unlock(&fs_info->qgroup_lock);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 827951fbf7fc..88cbb5995667 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -4049,7 +4049,7 @@ restart:
4049 if (trans && progress && err == -ENOSPC) { 4049 if (trans && progress && err == -ENOSPC) {
4050 ret = btrfs_force_chunk_alloc(trans, rc->extent_root, 4050 ret = btrfs_force_chunk_alloc(trans, rc->extent_root,
4051 rc->block_group->flags); 4051 rc->block_group->flags);
4052 if (ret == 0) { 4052 if (ret == 1) {
4053 err = 0; 4053 err = 0;
4054 progress = 0; 4054 progress = 0;
4055 goto restart; 4055 goto restart;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 9f2feabe99f2..94db0fa5225a 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3571,7 +3571,6 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
3571static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info, 3571static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
3572 int is_dev_replace) 3572 int is_dev_replace)
3573{ 3573{
3574 int ret = 0;
3575 unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND; 3574 unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND;
3576 int max_active = fs_info->thread_pool_size; 3575 int max_active = fs_info->thread_pool_size;
3577 3576
@@ -3584,34 +3583,36 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
3584 fs_info->scrub_workers = 3583 fs_info->scrub_workers =
3585 btrfs_alloc_workqueue("btrfs-scrub", flags, 3584 btrfs_alloc_workqueue("btrfs-scrub", flags,
3586 max_active, 4); 3585 max_active, 4);
3587 if (!fs_info->scrub_workers) { 3586 if (!fs_info->scrub_workers)
3588 ret = -ENOMEM; 3587 goto fail_scrub_workers;
3589 goto out; 3588
3590 }
3591 fs_info->scrub_wr_completion_workers = 3589 fs_info->scrub_wr_completion_workers =
3592 btrfs_alloc_workqueue("btrfs-scrubwrc", flags, 3590 btrfs_alloc_workqueue("btrfs-scrubwrc", flags,
3593 max_active, 2); 3591 max_active, 2);
3594 if (!fs_info->scrub_wr_completion_workers) { 3592 if (!fs_info->scrub_wr_completion_workers)
3595 ret = -ENOMEM; 3593 goto fail_scrub_wr_completion_workers;
3596 goto out; 3594
3597 }
3598 fs_info->scrub_nocow_workers = 3595 fs_info->scrub_nocow_workers =
3599 btrfs_alloc_workqueue("btrfs-scrubnc", flags, 1, 0); 3596 btrfs_alloc_workqueue("btrfs-scrubnc", flags, 1, 0);
3600 if (!fs_info->scrub_nocow_workers) { 3597 if (!fs_info->scrub_nocow_workers)
3601 ret = -ENOMEM; 3598 goto fail_scrub_nocow_workers;
3602 goto out;
3603 }
3604 fs_info->scrub_parity_workers = 3599 fs_info->scrub_parity_workers =
3605 btrfs_alloc_workqueue("btrfs-scrubparity", flags, 3600 btrfs_alloc_workqueue("btrfs-scrubparity", flags,
3606 max_active, 2); 3601 max_active, 2);
3607 if (!fs_info->scrub_parity_workers) { 3602 if (!fs_info->scrub_parity_workers)
3608 ret = -ENOMEM; 3603 goto fail_scrub_parity_workers;
3609 goto out;
3610 }
3611 } 3604 }
3612 ++fs_info->scrub_workers_refcnt; 3605 ++fs_info->scrub_workers_refcnt;
3613out: 3606 return 0;
3614 return ret; 3607
3608fail_scrub_parity_workers:
3609 btrfs_destroy_workqueue(fs_info->scrub_nocow_workers);
3610fail_scrub_nocow_workers:
3611 btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers);
3612fail_scrub_wr_completion_workers:
3613 btrfs_destroy_workqueue(fs_info->scrub_workers);
3614fail_scrub_workers:
3615 return -ENOMEM;
3615} 3616}
3616 3617
3617static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info) 3618static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info)
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index c0f18e7266b6..51e0f0d0053e 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -761,7 +761,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
761 761
762 if (!list_empty(&trans->ordered)) { 762 if (!list_empty(&trans->ordered)) {
763 spin_lock(&info->trans_lock); 763 spin_lock(&info->trans_lock);
764 list_splice(&trans->ordered, &cur_trans->pending_ordered); 764 list_splice_init(&trans->ordered, &cur_trans->pending_ordered);
765 spin_unlock(&info->trans_lock); 765 spin_unlock(&info->trans_lock);
766 } 766 }
767 767
@@ -1866,7 +1866,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1866 } 1866 }
1867 1867
1868 spin_lock(&root->fs_info->trans_lock); 1868 spin_lock(&root->fs_info->trans_lock);
1869 list_splice(&trans->ordered, &cur_trans->pending_ordered); 1869 list_splice_init(&trans->ordered, &cur_trans->pending_ordered);
1870 if (cur_trans->state >= TRANS_STATE_COMMIT_START) { 1870 if (cur_trans->state >= TRANS_STATE_COMMIT_START) {
1871 spin_unlock(&root->fs_info->trans_lock); 1871 spin_unlock(&root->fs_info->trans_lock);
1872 atomic_inc(&cur_trans->use_count); 1872 atomic_inc(&cur_trans->use_count);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 1ce80c1c4eb6..9c45431e69ab 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4117,6 +4117,187 @@ static int logged_inode_size(struct btrfs_root *log, struct inode *inode,
4117 return 0; 4117 return 0;
4118} 4118}
4119 4119
4120/*
4121 * At the moment we always log all xattrs. This is to figure out at log replay
4122 * time which xattrs must have their deletion replayed. If a xattr is missing
4123 * in the log tree and exists in the fs/subvol tree, we delete it. This is
4124 * because if a xattr is deleted, the inode is fsynced and a power failure
4125 * happens, causing the log to be replayed the next time the fs is mounted,
4126 * we want the xattr to not exist anymore (same behaviour as other filesystems
4127 * with a journal, ext3/4, xfs, f2fs, etc).
4128 */
4129static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans,
4130 struct btrfs_root *root,
4131 struct inode *inode,
4132 struct btrfs_path *path,
4133 struct btrfs_path *dst_path)
4134{
4135 int ret;
4136 struct btrfs_key key;
4137 const u64 ino = btrfs_ino(inode);
4138 int ins_nr = 0;
4139 int start_slot = 0;
4140
4141 key.objectid = ino;
4142 key.type = BTRFS_XATTR_ITEM_KEY;
4143 key.offset = 0;
4144
4145 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4146 if (ret < 0)
4147 return ret;
4148
4149 while (true) {
4150 int slot = path->slots[0];
4151 struct extent_buffer *leaf = path->nodes[0];
4152 int nritems = btrfs_header_nritems(leaf);
4153
4154 if (slot >= nritems) {
4155 if (ins_nr > 0) {
4156 u64 last_extent = 0;
4157
4158 ret = copy_items(trans, inode, dst_path, path,
4159 &last_extent, start_slot,
4160 ins_nr, 1, 0);
4161 /* can't be 1, extent items aren't processed */
4162 ASSERT(ret <= 0);
4163 if (ret < 0)
4164 return ret;
4165 ins_nr = 0;
4166 }
4167 ret = btrfs_next_leaf(root, path);
4168 if (ret < 0)
4169 return ret;
4170 else if (ret > 0)
4171 break;
4172 continue;
4173 }
4174
4175 btrfs_item_key_to_cpu(leaf, &key, slot);
4176 if (key.objectid != ino || key.type != BTRFS_XATTR_ITEM_KEY)
4177 break;
4178
4179 if (ins_nr == 0)
4180 start_slot = slot;
4181 ins_nr++;
4182 path->slots[0]++;
4183 cond_resched();
4184 }
4185 if (ins_nr > 0) {
4186 u64 last_extent = 0;
4187
4188 ret = copy_items(trans, inode, dst_path, path,
4189 &last_extent, start_slot,
4190 ins_nr, 1, 0);
4191 /* can't be 1, extent items aren't processed */
4192 ASSERT(ret <= 0);
4193 if (ret < 0)
4194 return ret;
4195 }
4196
4197 return 0;
4198}
4199
4200/*
4201 * If the no holes feature is enabled we need to make sure any hole between the
4202 * last extent and the i_size of our inode is explicitly marked in the log. This
4203 * is to make sure that doing something like:
4204 *
4205 * 1) create file with 128Kb of data
4206 * 2) truncate file to 64Kb
4207 * 3) truncate file to 256Kb
4208 * 4) fsync file
4209 * 5) <crash/power failure>
4210 * 6) mount fs and trigger log replay
4211 *
4212 * Will give us a file with a size of 256Kb, the first 64Kb of data match what
4213 * the file had in its first 64Kb of data at step 1 and the last 192Kb of the
4214 * file correspond to a hole. The presence of explicit holes in a log tree is
4215 * what guarantees that log replay will remove/adjust file extent items in the
4216 * fs/subvol tree.
4217 *
4218 * Here we do not need to care about holes between extents, that is already done
4219 * by copy_items(). We also only need to do this in the full sync path, where we
4220 * lookup for extents from the fs/subvol tree only. In the fast path case, we
4221 * lookup the list of modified extent maps and if any represents a hole, we
4222 * insert a corresponding extent representing a hole in the log tree.
4223 */
4224static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
4225 struct btrfs_root *root,
4226 struct inode *inode,
4227 struct btrfs_path *path)
4228{
4229 int ret;
4230 struct btrfs_key key;
4231 u64 hole_start;
4232 u64 hole_size;
4233 struct extent_buffer *leaf;
4234 struct btrfs_root *log = root->log_root;
4235 const u64 ino = btrfs_ino(inode);
4236 const u64 i_size = i_size_read(inode);
4237
4238 if (!btrfs_fs_incompat(root->fs_info, NO_HOLES))
4239 return 0;
4240
4241 key.objectid = ino;
4242 key.type = BTRFS_EXTENT_DATA_KEY;
4243 key.offset = (u64)-1;
4244
4245 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4246 ASSERT(ret != 0);
4247 if (ret < 0)
4248 return ret;
4249
4250 ASSERT(path->slots[0] > 0);
4251 path->slots[0]--;
4252 leaf = path->nodes[0];
4253 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4254
4255 if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) {
4256 /* inode does not have any extents */
4257 hole_start = 0;
4258 hole_size = i_size;
4259 } else {
4260 struct btrfs_file_extent_item *extent;
4261 u64 len;
4262
4263 /*
4264 * If there's an extent beyond i_size, an explicit hole was
4265 * already inserted by copy_items().
4266 */
4267 if (key.offset >= i_size)
4268 return 0;
4269
4270 extent = btrfs_item_ptr(leaf, path->slots[0],
4271 struct btrfs_file_extent_item);
4272
4273 if (btrfs_file_extent_type(leaf, extent) ==
4274 BTRFS_FILE_EXTENT_INLINE) {
4275 len = btrfs_file_extent_inline_len(leaf,
4276 path->slots[0],
4277 extent);
4278 ASSERT(len == i_size);
4279 return 0;
4280 }
4281
4282 len = btrfs_file_extent_num_bytes(leaf, extent);
4283 /* Last extent goes beyond i_size, no need to log a hole. */
4284 if (key.offset + len > i_size)
4285 return 0;
4286 hole_start = key.offset + len;
4287 hole_size = i_size - hole_start;
4288 }
4289 btrfs_release_path(path);
4290
4291 /* Last extent ends at i_size. */
4292 if (hole_size == 0)
4293 return 0;
4294
4295 hole_size = ALIGN(hole_size, root->sectorsize);
4296 ret = btrfs_insert_file_extent(trans, log, ino, hole_start, 0, 0,
4297 hole_size, 0, hole_size, 0, 0, 0);
4298 return ret;
4299}
4300
4120/* log a single inode in the tree log. 4301/* log a single inode in the tree log.
4121 * At least one parent directory for this inode must exist in the tree 4302 * At least one parent directory for this inode must exist in the tree
4122 * or be logged already. 4303 * or be logged already.
@@ -4155,6 +4336,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
4155 u64 ino = btrfs_ino(inode); 4336 u64 ino = btrfs_ino(inode);
4156 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 4337 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
4157 u64 logged_isize = 0; 4338 u64 logged_isize = 0;
4339 bool need_log_inode_item = true;
4158 4340
4159 path = btrfs_alloc_path(); 4341 path = btrfs_alloc_path();
4160 if (!path) 4342 if (!path)
@@ -4263,11 +4445,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
4263 } else { 4445 } else {
4264 if (inode_only == LOG_INODE_ALL) 4446 if (inode_only == LOG_INODE_ALL)
4265 fast_search = true; 4447 fast_search = true;
4266 ret = log_inode_item(trans, log, dst_path, inode);
4267 if (ret) {
4268 err = ret;
4269 goto out_unlock;
4270 }
4271 goto log_extents; 4448 goto log_extents;
4272 } 4449 }
4273 4450
@@ -4290,6 +4467,28 @@ again:
4290 if (min_key.type > max_key.type) 4467 if (min_key.type > max_key.type)
4291 break; 4468 break;
4292 4469
4470 if (min_key.type == BTRFS_INODE_ITEM_KEY)
4471 need_log_inode_item = false;
4472
4473 /* Skip xattrs, we log them later with btrfs_log_all_xattrs() */
4474 if (min_key.type == BTRFS_XATTR_ITEM_KEY) {
4475 if (ins_nr == 0)
4476 goto next_slot;
4477 ret = copy_items(trans, inode, dst_path, path,
4478 &last_extent, ins_start_slot,
4479 ins_nr, inode_only, logged_isize);
4480 if (ret < 0) {
4481 err = ret;
4482 goto out_unlock;
4483 }
4484 ins_nr = 0;
4485 if (ret) {
4486 btrfs_release_path(path);
4487 continue;
4488 }
4489 goto next_slot;
4490 }
4491
4293 src = path->nodes[0]; 4492 src = path->nodes[0];
4294 if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) { 4493 if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) {
4295 ins_nr++; 4494 ins_nr++;
@@ -4357,9 +4556,26 @@ next_slot:
4357 ins_nr = 0; 4556 ins_nr = 0;
4358 } 4557 }
4359 4558
4559 btrfs_release_path(path);
4560 btrfs_release_path(dst_path);
4561 err = btrfs_log_all_xattrs(trans, root, inode, path, dst_path);
4562 if (err)
4563 goto out_unlock;
4564 if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) {
4565 btrfs_release_path(path);
4566 btrfs_release_path(dst_path);
4567 err = btrfs_log_trailing_hole(trans, root, inode, path);
4568 if (err)
4569 goto out_unlock;
4570 }
4360log_extents: 4571log_extents:
4361 btrfs_release_path(path); 4572 btrfs_release_path(path);
4362 btrfs_release_path(dst_path); 4573 btrfs_release_path(dst_path);
4574 if (need_log_inode_item) {
4575 err = log_inode_item(trans, log, dst_path, inode);
4576 if (err)
4577 goto out_unlock;
4578 }
4363 if (fast_search) { 4579 if (fast_search) {
4364 /* 4580 /*
4365 * Some ordered extents started by fsync might have completed 4581 * Some ordered extents started by fsync might have completed
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 4b438b4c8c91..fbe7c104531c 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2766,6 +2766,20 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
2766 root = root->fs_info->chunk_root; 2766 root = root->fs_info->chunk_root;
2767 extent_root = root->fs_info->extent_root; 2767 extent_root = root->fs_info->extent_root;
2768 2768
2769 /*
2770 * Prevent races with automatic removal of unused block groups.
2771 * After we relocate and before we remove the chunk with offset
2772 * chunk_offset, automatic removal of the block group can kick in,
2773 * resulting in a failure when calling btrfs_remove_chunk() below.
2774 *
2775 * Make sure to acquire this mutex before doing a tree search (dev
2776 * or chunk trees) to find chunks. Otherwise the cleaner kthread might
2777 * call btrfs_remove_chunk() (through btrfs_delete_unused_bgs()) after
2778 * we release the path used to search the chunk/dev tree and before
2779 * the current task acquires this mutex and calls us.
2780 */
2781 ASSERT(mutex_is_locked(&root->fs_info->delete_unused_bgs_mutex));
2782
2769 ret = btrfs_can_relocate(extent_root, chunk_offset); 2783 ret = btrfs_can_relocate(extent_root, chunk_offset);
2770 if (ret) 2784 if (ret)
2771 return -ENOSPC; 2785 return -ENOSPC;
@@ -2814,13 +2828,18 @@ again:
2814 key.type = BTRFS_CHUNK_ITEM_KEY; 2828 key.type = BTRFS_CHUNK_ITEM_KEY;
2815 2829
2816 while (1) { 2830 while (1) {
2831 mutex_lock(&root->fs_info->delete_unused_bgs_mutex);
2817 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0); 2832 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
2818 if (ret < 0) 2833 if (ret < 0) {
2834 mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
2819 goto error; 2835 goto error;
2836 }
2820 BUG_ON(ret == 0); /* Corruption */ 2837 BUG_ON(ret == 0); /* Corruption */
2821 2838
2822 ret = btrfs_previous_item(chunk_root, path, key.objectid, 2839 ret = btrfs_previous_item(chunk_root, path, key.objectid,
2823 key.type); 2840 key.type);
2841 if (ret)
2842 mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
2824 if (ret < 0) 2843 if (ret < 0)
2825 goto error; 2844 goto error;
2826 if (ret > 0) 2845 if (ret > 0)
@@ -2843,6 +2862,7 @@ again:
2843 else 2862 else
2844 BUG_ON(ret); 2863 BUG_ON(ret);
2845 } 2864 }
2865 mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
2846 2866
2847 if (found_key.offset == 0) 2867 if (found_key.offset == 0)
2848 break; 2868 break;
@@ -3299,9 +3319,12 @@ again:
3299 goto error; 3319 goto error;
3300 } 3320 }
3301 3321
3322 mutex_lock(&fs_info->delete_unused_bgs_mutex);
3302 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0); 3323 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
3303 if (ret < 0) 3324 if (ret < 0) {
3325 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3304 goto error; 3326 goto error;
3327 }
3305 3328
3306 /* 3329 /*
3307 * this shouldn't happen, it means the last relocate 3330 * this shouldn't happen, it means the last relocate
@@ -3313,6 +3336,7 @@ again:
3313 ret = btrfs_previous_item(chunk_root, path, 0, 3336 ret = btrfs_previous_item(chunk_root, path, 0,
3314 BTRFS_CHUNK_ITEM_KEY); 3337 BTRFS_CHUNK_ITEM_KEY);
3315 if (ret) { 3338 if (ret) {
3339 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3316 ret = 0; 3340 ret = 0;
3317 break; 3341 break;
3318 } 3342 }
@@ -3321,8 +3345,10 @@ again:
3321 slot = path->slots[0]; 3345 slot = path->slots[0];
3322 btrfs_item_key_to_cpu(leaf, &found_key, slot); 3346 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3323 3347
3324 if (found_key.objectid != key.objectid) 3348 if (found_key.objectid != key.objectid) {
3349 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3325 break; 3350 break;
3351 }
3326 3352
3327 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); 3353 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
3328 3354
@@ -3335,10 +3361,13 @@ again:
3335 ret = should_balance_chunk(chunk_root, leaf, chunk, 3361 ret = should_balance_chunk(chunk_root, leaf, chunk,
3336 found_key.offset); 3362 found_key.offset);
3337 btrfs_release_path(path); 3363 btrfs_release_path(path);
3338 if (!ret) 3364 if (!ret) {
3365 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3339 goto loop; 3366 goto loop;
3367 }
3340 3368
3341 if (counting) { 3369 if (counting) {
3370 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3342 spin_lock(&fs_info->balance_lock); 3371 spin_lock(&fs_info->balance_lock);
3343 bctl->stat.expected++; 3372 bctl->stat.expected++;
3344 spin_unlock(&fs_info->balance_lock); 3373 spin_unlock(&fs_info->balance_lock);
@@ -3348,6 +3377,7 @@ again:
3348 ret = btrfs_relocate_chunk(chunk_root, 3377 ret = btrfs_relocate_chunk(chunk_root,
3349 found_key.objectid, 3378 found_key.objectid,
3350 found_key.offset); 3379 found_key.offset);
3380 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3351 if (ret && ret != -ENOSPC) 3381 if (ret && ret != -ENOSPC)
3352 goto error; 3382 goto error;
3353 if (ret == -ENOSPC) { 3383 if (ret == -ENOSPC) {
@@ -4087,11 +4117,16 @@ again:
4087 key.type = BTRFS_DEV_EXTENT_KEY; 4117 key.type = BTRFS_DEV_EXTENT_KEY;
4088 4118
4089 do { 4119 do {
4120 mutex_lock(&root->fs_info->delete_unused_bgs_mutex);
4090 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4121 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4091 if (ret < 0) 4122 if (ret < 0) {
4123 mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
4092 goto done; 4124 goto done;
4125 }
4093 4126
4094 ret = btrfs_previous_item(root, path, 0, key.type); 4127 ret = btrfs_previous_item(root, path, 0, key.type);
4128 if (ret)
4129 mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
4095 if (ret < 0) 4130 if (ret < 0)
4096 goto done; 4131 goto done;
4097 if (ret) { 4132 if (ret) {
@@ -4105,6 +4140,7 @@ again:
4105 btrfs_item_key_to_cpu(l, &key, path->slots[0]); 4140 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
4106 4141
4107 if (key.objectid != device->devid) { 4142 if (key.objectid != device->devid) {
4143 mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
4108 btrfs_release_path(path); 4144 btrfs_release_path(path);
4109 break; 4145 break;
4110 } 4146 }
@@ -4113,6 +4149,7 @@ again:
4113 length = btrfs_dev_extent_length(l, dev_extent); 4149 length = btrfs_dev_extent_length(l, dev_extent);
4114 4150
4115 if (key.offset + length <= new_size) { 4151 if (key.offset + length <= new_size) {
4152 mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
4116 btrfs_release_path(path); 4153 btrfs_release_path(path);
4117 break; 4154 break;
4118 } 4155 }
@@ -4122,6 +4159,7 @@ again:
4122 btrfs_release_path(path); 4159 btrfs_release_path(path);
4123 4160
4124 ret = btrfs_relocate_chunk(root, chunk_objectid, chunk_offset); 4161 ret = btrfs_relocate_chunk(root, chunk_objectid, chunk_offset);
4162 mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
4125 if (ret && ret != -ENOSPC) 4163 if (ret && ret != -ENOSPC)
4126 goto done; 4164 goto done;
4127 if (ret == -ENOSPC) 4165 if (ret == -ENOSPC)
@@ -5715,7 +5753,6 @@ static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio, int e
5715static void btrfs_end_bio(struct bio *bio, int err) 5753static void btrfs_end_bio(struct bio *bio, int err)
5716{ 5754{
5717 struct btrfs_bio *bbio = bio->bi_private; 5755 struct btrfs_bio *bbio = bio->bi_private;
5718 struct btrfs_device *dev = bbio->stripes[0].dev;
5719 int is_orig_bio = 0; 5756 int is_orig_bio = 0;
5720 5757
5721 if (err) { 5758 if (err) {
@@ -5723,6 +5760,7 @@ static void btrfs_end_bio(struct bio *bio, int err)
5723 if (err == -EIO || err == -EREMOTEIO) { 5760 if (err == -EIO || err == -EREMOTEIO) {
5724 unsigned int stripe_index = 5761 unsigned int stripe_index =
5725 btrfs_io_bio(bio)->stripe_index; 5762 btrfs_io_bio(bio)->stripe_index;
5763 struct btrfs_device *dev;
5726 5764
5727 BUG_ON(stripe_index >= bbio->num_stripes); 5765 BUG_ON(stripe_index >= bbio->num_stripes);
5728 dev = bbio->stripes[stripe_index].dev; 5766 dev = bbio->stripes[stripe_index].dev;
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 6b8e2f091f5b..48851f6ea6ec 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -896,6 +896,7 @@ COMPATIBLE_IOCTL(FIGETBSZ)
896/* 'X' - originally XFS but some now in the VFS */ 896/* 'X' - originally XFS but some now in the VFS */
897COMPATIBLE_IOCTL(FIFREEZE) 897COMPATIBLE_IOCTL(FIFREEZE)
898COMPATIBLE_IOCTL(FITHAW) 898COMPATIBLE_IOCTL(FITHAW)
899COMPATIBLE_IOCTL(FITRIM)
899COMPATIBLE_IOCTL(KDGETKEYCODE) 900COMPATIBLE_IOCTL(KDGETKEYCODE)
900COMPATIBLE_IOCTL(KDSETKEYCODE) 901COMPATIBLE_IOCTL(KDSETKEYCODE)
901COMPATIBLE_IOCTL(KDGKBTYPE) 902COMPATIBLE_IOCTL(KDGKBTYPE)
diff --git a/fs/configfs/item.c b/fs/configfs/item.c
index 4d6a30e76168..b863a09cd2f1 100644
--- a/fs/configfs/item.c
+++ b/fs/configfs/item.c
@@ -115,7 +115,7 @@ void config_item_init_type_name(struct config_item *item,
115 const char *name, 115 const char *name,
116 struct config_item_type *type) 116 struct config_item_type *type)
117{ 117{
118 config_item_set_name(item, name); 118 config_item_set_name(item, "%s", name);
119 item->ci_type = type; 119 item->ci_type = type;
120 config_item_init(item); 120 config_item_init(item);
121} 121}
@@ -124,7 +124,7 @@ EXPORT_SYMBOL(config_item_init_type_name);
124void config_group_init_type_name(struct config_group *group, const char *name, 124void config_group_init_type_name(struct config_group *group, const char *name,
125 struct config_item_type *type) 125 struct config_item_type *type)
126{ 126{
127 config_item_set_name(&group->cg_item, name); 127 config_item_set_name(&group->cg_item, "%s", name);
128 group->cg_item.ci_type = type; 128 group->cg_item.ci_type = type;
129 config_group_init(group); 129 config_group_init(group);
130} 130}
diff --git a/fs/dcache.c b/fs/dcache.c
index 7a3f3e5f9cea..5c8ea15e73a5 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -642,7 +642,7 @@ static inline bool fast_dput(struct dentry *dentry)
642 642
643 /* 643 /*
644 * If we have a d_op->d_delete() operation, we sould not 644 * If we have a d_op->d_delete() operation, we sould not
645 * let the dentry count go to zero, so use "put__or_lock". 645 * let the dentry count go to zero, so use "put_or_lock".
646 */ 646 */
647 if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) 647 if (unlikely(dentry->d_flags & DCACHE_OP_DELETE))
648 return lockref_put_or_lock(&dentry->d_lockref); 648 return lockref_put_or_lock(&dentry->d_lockref);
@@ -697,7 +697,7 @@ static inline bool fast_dput(struct dentry *dentry)
697 */ 697 */
698 smp_rmb(); 698 smp_rmb();
699 d_flags = ACCESS_ONCE(dentry->d_flags); 699 d_flags = ACCESS_ONCE(dentry->d_flags);
700 d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST; 700 d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST | DCACHE_DISCONNECTED;
701 701
702 /* Nothing to do? Dropping the reference was all we needed? */ 702 /* Nothing to do? Dropping the reference was all we needed? */
703 if (d_flags == (DCACHE_REFERENCED | DCACHE_LRU_LIST) && !d_unhashed(dentry)) 703 if (d_flags == (DCACHE_REFERENCED | DCACHE_LRU_LIST) && !d_unhashed(dentry))
@@ -776,6 +776,9 @@ repeat:
776 if (unlikely(d_unhashed(dentry))) 776 if (unlikely(d_unhashed(dentry)))
777 goto kill_it; 777 goto kill_it;
778 778
779 if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
780 goto kill_it;
781
779 if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) { 782 if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) {
780 if (dentry->d_op->d_delete(dentry)) 783 if (dentry->d_op->d_delete(dentry))
781 goto kill_it; 784 goto kill_it;
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 72afcc629d7b..feef8a9c4de7 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -325,7 +325,6 @@ ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
325 return rc; 325 return rc;
326 326
327 switch (cmd) { 327 switch (cmd) {
328 case FITRIM:
329 case FS_IOC32_GETFLAGS: 328 case FS_IOC32_GETFLAGS:
330 case FS_IOC32_SETFLAGS: 329 case FS_IOC32_SETFLAGS:
331 case FS_IOC32_GETVERSION: 330 case FS_IOC32_GETVERSION:
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index aadb72828834..2553aa8b608d 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -504,7 +504,7 @@ __read_extent_tree_block(const char *function, unsigned int line,
504 struct buffer_head *bh; 504 struct buffer_head *bh;
505 int err; 505 int err;
506 506
507 bh = sb_getblk(inode->i_sb, pblk); 507 bh = sb_getblk_gfp(inode->i_sb, pblk, __GFP_MOVABLE | GFP_NOFS);
508 if (unlikely(!bh)) 508 if (unlikely(!bh))
509 return ERR_PTR(-ENOMEM); 509 return ERR_PTR(-ENOMEM);
510 510
@@ -1089,7 +1089,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
1089 err = -EIO; 1089 err = -EIO;
1090 goto cleanup; 1090 goto cleanup;
1091 } 1091 }
1092 bh = sb_getblk(inode->i_sb, newblock); 1092 bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS);
1093 if (unlikely(!bh)) { 1093 if (unlikely(!bh)) {
1094 err = -ENOMEM; 1094 err = -ENOMEM;
1095 goto cleanup; 1095 goto cleanup;
@@ -1283,7 +1283,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
1283 if (newblock == 0) 1283 if (newblock == 0)
1284 return err; 1284 return err;
1285 1285
1286 bh = sb_getblk(inode->i_sb, newblock); 1286 bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS);
1287 if (unlikely(!bh)) 1287 if (unlikely(!bh))
1288 return -ENOMEM; 1288 return -ENOMEM;
1289 lock_buffer(bh); 1289 lock_buffer(bh);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 41f8e55afcd1..cecf9aa10811 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1323,7 +1323,7 @@ static void ext4_da_page_release_reservation(struct page *page,
1323 unsigned int offset, 1323 unsigned int offset,
1324 unsigned int length) 1324 unsigned int length)
1325{ 1325{
1326 int to_release = 0; 1326 int to_release = 0, contiguous_blks = 0;
1327 struct buffer_head *head, *bh; 1327 struct buffer_head *head, *bh;
1328 unsigned int curr_off = 0; 1328 unsigned int curr_off = 0;
1329 struct inode *inode = page->mapping->host; 1329 struct inode *inode = page->mapping->host;
@@ -1344,14 +1344,23 @@ static void ext4_da_page_release_reservation(struct page *page,
1344 1344
1345 if ((offset <= curr_off) && (buffer_delay(bh))) { 1345 if ((offset <= curr_off) && (buffer_delay(bh))) {
1346 to_release++; 1346 to_release++;
1347 contiguous_blks++;
1347 clear_buffer_delay(bh); 1348 clear_buffer_delay(bh);
1349 } else if (contiguous_blks) {
1350 lblk = page->index <<
1351 (PAGE_CACHE_SHIFT - inode->i_blkbits);
1352 lblk += (curr_off >> inode->i_blkbits) -
1353 contiguous_blks;
1354 ext4_es_remove_extent(inode, lblk, contiguous_blks);
1355 contiguous_blks = 0;
1348 } 1356 }
1349 curr_off = next_off; 1357 curr_off = next_off;
1350 } while ((bh = bh->b_this_page) != head); 1358 } while ((bh = bh->b_this_page) != head);
1351 1359
1352 if (to_release) { 1360 if (contiguous_blks) {
1353 lblk = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); 1361 lblk = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1354 ext4_es_remove_extent(inode, lblk, to_release); 1362 lblk += (curr_off >> inode->i_blkbits) - contiguous_blks;
1363 ext4_es_remove_extent(inode, lblk, contiguous_blks);
1355 } 1364 }
1356 1365
1357 /* If we have released all the blocks belonging to a cluster, then we 1366 /* If we have released all the blocks belonging to a cluster, then we
@@ -4344,7 +4353,12 @@ static void ext4_update_other_inodes_time(struct super_block *sb,
4344 int inode_size = EXT4_INODE_SIZE(sb); 4353 int inode_size = EXT4_INODE_SIZE(sb);
4345 4354
4346 oi.orig_ino = orig_ino; 4355 oi.orig_ino = orig_ino;
4347 ino = (orig_ino & ~(inodes_per_block - 1)) + 1; 4356 /*
4357 * Calculate the first inode in the inode table block. Inode
4358 * numbers are one-based. That is, the first inode in a block
4359 * (assuming 4k blocks and 256 byte inodes) is (n*16 + 1).
4360 */
4361 ino = ((orig_ino - 1) & ~(inodes_per_block - 1)) + 1;
4348 for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) { 4362 for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) {
4349 if (ino == orig_ino) 4363 if (ino == orig_ino)
4350 continue; 4364 continue;
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index cb8451246b30..1346cfa355d0 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -755,7 +755,6 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
755 return err; 755 return err;
756 } 756 }
757 case EXT4_IOC_MOVE_EXT: 757 case EXT4_IOC_MOVE_EXT:
758 case FITRIM:
759 case EXT4_IOC_RESIZE_FS: 758 case EXT4_IOC_RESIZE_FS:
760 case EXT4_IOC_PRECACHE_EXTENTS: 759 case EXT4_IOC_PRECACHE_EXTENTS:
761 case EXT4_IOC_SET_ENCRYPTION_POLICY: 760 case EXT4_IOC_SET_ENCRYPTION_POLICY:
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index f6aedf88da43..34b610ea5030 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4816,18 +4816,12 @@ do_more:
4816 /* 4816 /*
4817 * blocks being freed are metadata. these blocks shouldn't 4817 * blocks being freed are metadata. these blocks shouldn't
4818 * be used until this transaction is committed 4818 * be used until this transaction is committed
4819 *
4820 * We use __GFP_NOFAIL because ext4_free_blocks() is not allowed
4821 * to fail.
4819 */ 4822 */
4820 retry: 4823 new_entry = kmem_cache_alloc(ext4_free_data_cachep,
4821 new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS); 4824 GFP_NOFS|__GFP_NOFAIL);
4822 if (!new_entry) {
4823 /*
4824 * We use a retry loop because
4825 * ext4_free_blocks() is not allowed to fail.
4826 */
4827 cond_resched();
4828 congestion_wait(BLK_RW_ASYNC, HZ/50);
4829 goto retry;
4830 }
4831 new_entry->efd_start_cluster = bit; 4825 new_entry->efd_start_cluster = bit;
4832 new_entry->efd_group = block_group; 4826 new_entry->efd_group = block_group;
4833 new_entry->efd_count = count_clusters; 4827 new_entry->efd_count = count_clusters;
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index b52374e42102..6163ad21cb0e 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -620,6 +620,7 @@ int ext4_ind_migrate(struct inode *inode)
620 struct ext4_inode_info *ei = EXT4_I(inode); 620 struct ext4_inode_info *ei = EXT4_I(inode);
621 struct ext4_extent *ex; 621 struct ext4_extent *ex;
622 unsigned int i, len; 622 unsigned int i, len;
623 ext4_lblk_t start, end;
623 ext4_fsblk_t blk; 624 ext4_fsblk_t blk;
624 handle_t *handle; 625 handle_t *handle;
625 int ret; 626 int ret;
@@ -633,6 +634,14 @@ int ext4_ind_migrate(struct inode *inode)
633 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) 634 EXT4_FEATURE_RO_COMPAT_BIGALLOC))
634 return -EOPNOTSUPP; 635 return -EOPNOTSUPP;
635 636
637 /*
638 * In order to get correct extent info, force all delayed allocation
639 * blocks to be allocated, otherwise delayed allocation blocks may not
640 * be reflected and bypass the checks on extent header.
641 */
642 if (test_opt(inode->i_sb, DELALLOC))
643 ext4_alloc_da_blocks(inode);
644
636 handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1); 645 handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1);
637 if (IS_ERR(handle)) 646 if (IS_ERR(handle))
638 return PTR_ERR(handle); 647 return PTR_ERR(handle);
@@ -650,11 +659,13 @@ int ext4_ind_migrate(struct inode *inode)
650 goto errout; 659 goto errout;
651 } 660 }
652 if (eh->eh_entries == 0) 661 if (eh->eh_entries == 0)
653 blk = len = 0; 662 blk = len = start = end = 0;
654 else { 663 else {
655 len = le16_to_cpu(ex->ee_len); 664 len = le16_to_cpu(ex->ee_len);
656 blk = ext4_ext_pblock(ex); 665 blk = ext4_ext_pblock(ex);
657 if (len > EXT4_NDIR_BLOCKS) { 666 start = le32_to_cpu(ex->ee_block);
667 end = start + len - 1;
668 if (end >= EXT4_NDIR_BLOCKS) {
658 ret = -EOPNOTSUPP; 669 ret = -EOPNOTSUPP;
659 goto errout; 670 goto errout;
660 } 671 }
@@ -662,7 +673,7 @@ int ext4_ind_migrate(struct inode *inode)
662 673
663 ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS); 674 ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS);
664 memset(ei->i_data, 0, sizeof(ei->i_data)); 675 memset(ei->i_data, 0, sizeof(ei->i_data));
665 for (i=0; i < len; i++) 676 for (i = start; i <= end; i++)
666 ei->i_data[i] = cpu_to_le32(blk++); 677 ei->i_data[i] = cpu_to_le32(blk++);
667 ext4_mark_inode_dirty(handle, inode); 678 ext4_mark_inode_dirty(handle, inode);
668errout: 679errout:
diff --git a/fs/hpfs/alloc.c b/fs/hpfs/alloc.c
index f005046e1591..d6a4b55d2ab0 100644
--- a/fs/hpfs/alloc.c
+++ b/fs/hpfs/alloc.c
@@ -484,3 +484,98 @@ struct anode *hpfs_alloc_anode(struct super_block *s, secno near, anode_secno *a
484 a->btree.first_free = cpu_to_le16(8); 484 a->btree.first_free = cpu_to_le16(8);
485 return a; 485 return a;
486} 486}
487
488static unsigned find_run(__le32 *bmp, unsigned *idx)
489{
490 unsigned len;
491 while (tstbits(bmp, *idx, 1)) {
492 (*idx)++;
493 if (unlikely(*idx >= 0x4000))
494 return 0;
495 }
496 len = 1;
497 while (!tstbits(bmp, *idx + len, 1))
498 len++;
499 return len;
500}
501
502static int do_trim(struct super_block *s, secno start, unsigned len, secno limit_start, secno limit_end, unsigned minlen, unsigned *result)
503{
504 int err;
505 secno end;
506 if (fatal_signal_pending(current))
507 return -EINTR;
508 end = start + len;
509 if (start < limit_start)
510 start = limit_start;
511 if (end > limit_end)
512 end = limit_end;
513 if (start >= end)
514 return 0;
515 if (end - start < minlen)
516 return 0;
517 err = sb_issue_discard(s, start, end - start, GFP_NOFS, 0);
518 if (err)
519 return err;
520 *result += end - start;
521 return 0;
522}
523
524int hpfs_trim_fs(struct super_block *s, u64 start, u64 end, u64 minlen, unsigned *result)
525{
526 int err = 0;
527 struct hpfs_sb_info *sbi = hpfs_sb(s);
528 unsigned idx, len, start_bmp, end_bmp;
529 __le32 *bmp;
530 struct quad_buffer_head qbh;
531
532 *result = 0;
533 if (!end || end > sbi->sb_fs_size)
534 end = sbi->sb_fs_size;
535 if (start >= sbi->sb_fs_size)
536 return 0;
537 if (minlen > 0x4000)
538 return 0;
539 if (start < sbi->sb_dirband_start + sbi->sb_dirband_size && end > sbi->sb_dirband_start) {
540 hpfs_lock(s);
541 if (s->s_flags & MS_RDONLY) {
542 err = -EROFS;
543 goto unlock_1;
544 }
545 if (!(bmp = hpfs_map_dnode_bitmap(s, &qbh))) {
546 err = -EIO;
547 goto unlock_1;
548 }
549 idx = 0;
550 while ((len = find_run(bmp, &idx)) && !err) {
551 err = do_trim(s, sbi->sb_dirband_start + idx * 4, len * 4, start, end, minlen, result);
552 idx += len;
553 }
554 hpfs_brelse4(&qbh);
555unlock_1:
556 hpfs_unlock(s);
557 }
558 start_bmp = start >> 14;
559 end_bmp = (end + 0x3fff) >> 14;
560 while (start_bmp < end_bmp && !err) {
561 hpfs_lock(s);
562 if (s->s_flags & MS_RDONLY) {
563 err = -EROFS;
564 goto unlock_2;
565 }
566 if (!(bmp = hpfs_map_bitmap(s, start_bmp, &qbh, "trim"))) {
567 err = -EIO;
568 goto unlock_2;
569 }
570 idx = 0;
571 while ((len = find_run(bmp, &idx)) && !err) {
572 err = do_trim(s, (start_bmp << 14) + idx, len, start, end, minlen, result);
573 idx += len;
574 }
575 hpfs_brelse4(&qbh);
576unlock_2:
577 hpfs_unlock(s);
578 start_bmp++;
579 }
580 return err;
581}
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index 2a8e07425de0..dc540bfcee1d 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -327,4 +327,5 @@ const struct file_operations hpfs_dir_ops =
327 .iterate = hpfs_readdir, 327 .iterate = hpfs_readdir,
328 .release = hpfs_dir_release, 328 .release = hpfs_dir_release,
329 .fsync = hpfs_file_fsync, 329 .fsync = hpfs_file_fsync,
330 .unlocked_ioctl = hpfs_ioctl,
330}; 331};
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 6d8cfe9b52d6..7ca28d604bf7 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -203,6 +203,7 @@ const struct file_operations hpfs_file_ops =
203 .release = hpfs_file_release, 203 .release = hpfs_file_release,
204 .fsync = hpfs_file_fsync, 204 .fsync = hpfs_file_fsync,
205 .splice_read = generic_file_splice_read, 205 .splice_read = generic_file_splice_read,
206 .unlocked_ioctl = hpfs_ioctl,
206}; 207};
207 208
208const struct inode_operations hpfs_file_iops = 209const struct inode_operations hpfs_file_iops =
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index bb04b58d1d69..c4867b5116dd 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -18,6 +18,8 @@
18#include <linux/pagemap.h> 18#include <linux/pagemap.h>
19#include <linux/buffer_head.h> 19#include <linux/buffer_head.h>
20#include <linux/slab.h> 20#include <linux/slab.h>
21#include <linux/sched.h>
22#include <linux/blkdev.h>
21#include <asm/unaligned.h> 23#include <asm/unaligned.h>
22 24
23#include "hpfs.h" 25#include "hpfs.h"
@@ -200,6 +202,7 @@ void hpfs_free_dnode(struct super_block *, secno);
200struct dnode *hpfs_alloc_dnode(struct super_block *, secno, dnode_secno *, struct quad_buffer_head *); 202struct dnode *hpfs_alloc_dnode(struct super_block *, secno, dnode_secno *, struct quad_buffer_head *);
201struct fnode *hpfs_alloc_fnode(struct super_block *, secno, fnode_secno *, struct buffer_head **); 203struct fnode *hpfs_alloc_fnode(struct super_block *, secno, fnode_secno *, struct buffer_head **);
202struct anode *hpfs_alloc_anode(struct super_block *, secno, anode_secno *, struct buffer_head **); 204struct anode *hpfs_alloc_anode(struct super_block *, secno, anode_secno *, struct buffer_head **);
205int hpfs_trim_fs(struct super_block *, u64, u64, u64, unsigned *);
203 206
204/* anode.c */ 207/* anode.c */
205 208
@@ -318,6 +321,7 @@ __printf(2, 3)
318void hpfs_error(struct super_block *, const char *, ...); 321void hpfs_error(struct super_block *, const char *, ...);
319int hpfs_stop_cycles(struct super_block *, int, int *, int *, char *); 322int hpfs_stop_cycles(struct super_block *, int, int *, int *, char *);
320unsigned hpfs_get_free_dnodes(struct super_block *); 323unsigned hpfs_get_free_dnodes(struct super_block *);
324long hpfs_ioctl(struct file *file, unsigned cmd, unsigned long arg);
321 325
322/* 326/*
323 * local time (HPFS) to GMT (Unix) 327 * local time (HPFS) to GMT (Unix)
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 7cd00d3a7c9b..68a9bed05628 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -52,17 +52,20 @@ static void unmark_dirty(struct super_block *s)
52} 52}
53 53
54/* Filesystem error... */ 54/* Filesystem error... */
55static char err_buf[1024];
56
57void hpfs_error(struct super_block *s, const char *fmt, ...) 55void hpfs_error(struct super_block *s, const char *fmt, ...)
58{ 56{
57 struct va_format vaf;
59 va_list args; 58 va_list args;
60 59
61 va_start(args, fmt); 60 va_start(args, fmt);
62 vsnprintf(err_buf, sizeof(err_buf), fmt, args); 61
62 vaf.fmt = fmt;
63 vaf.va = &args;
64
65 pr_err("filesystem error: %pV", &vaf);
66
63 va_end(args); 67 va_end(args);
64 68
65 pr_err("filesystem error: %s", err_buf);
66 if (!hpfs_sb(s)->sb_was_error) { 69 if (!hpfs_sb(s)->sb_was_error) {
67 if (hpfs_sb(s)->sb_err == 2) { 70 if (hpfs_sb(s)->sb_err == 2) {
68 pr_cont("; crashing the system because you wanted it\n"); 71 pr_cont("; crashing the system because you wanted it\n");
@@ -196,12 +199,39 @@ static int hpfs_statfs(struct dentry *dentry, struct kstatfs *buf)
196 return 0; 199 return 0;
197} 200}
198 201
202
203long hpfs_ioctl(struct file *file, unsigned cmd, unsigned long arg)
204{
205 switch (cmd) {
206 case FITRIM: {
207 struct fstrim_range range;
208 secno n_trimmed;
209 int r;
210 if (!capable(CAP_SYS_ADMIN))
211 return -EPERM;
212 if (copy_from_user(&range, (struct fstrim_range __user *)arg, sizeof(range)))
213 return -EFAULT;
214 r = hpfs_trim_fs(file_inode(file)->i_sb, range.start >> 9, (range.start + range.len) >> 9, (range.minlen + 511) >> 9, &n_trimmed);
215 if (r)
216 return r;
217 range.len = (u64)n_trimmed << 9;
218 if (copy_to_user((struct fstrim_range __user *)arg, &range, sizeof(range)))
219 return -EFAULT;
220 return 0;
221 }
222 default: {
223 return -ENOIOCTLCMD;
224 }
225 }
226}
227
228
199static struct kmem_cache * hpfs_inode_cachep; 229static struct kmem_cache * hpfs_inode_cachep;
200 230
201static struct inode *hpfs_alloc_inode(struct super_block *sb) 231static struct inode *hpfs_alloc_inode(struct super_block *sb)
202{ 232{
203 struct hpfs_inode_info *ei; 233 struct hpfs_inode_info *ei;
204 ei = (struct hpfs_inode_info *)kmem_cache_alloc(hpfs_inode_cachep, GFP_NOFS); 234 ei = kmem_cache_alloc(hpfs_inode_cachep, GFP_NOFS);
205 if (!ei) 235 if (!ei)
206 return NULL; 236 return NULL;
207 ei->vfs_inode.i_version = 1; 237 ei->vfs_inode.i_version = 1;
@@ -424,11 +454,14 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
424 int o; 454 int o;
425 struct hpfs_sb_info *sbi = hpfs_sb(s); 455 struct hpfs_sb_info *sbi = hpfs_sb(s);
426 char *new_opts = kstrdup(data, GFP_KERNEL); 456 char *new_opts = kstrdup(data, GFP_KERNEL);
427 457
458 if (!new_opts)
459 return -ENOMEM;
460
428 sync_filesystem(s); 461 sync_filesystem(s);
429 462
430 *flags |= MS_NOATIME; 463 *flags |= MS_NOATIME;
431 464
432 hpfs_lock(s); 465 hpfs_lock(s);
433 uid = sbi->sb_uid; gid = sbi->sb_gid; 466 uid = sbi->sb_uid; gid = sbi->sb_gid;
434 umask = 0777 & ~sbi->sb_mode; 467 umask = 0777 & ~sbi->sb_mode;
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index e98d39d75cf4..b9dc23cd04f2 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -76,7 +76,7 @@ static int jfs_open(struct inode *inode, struct file *file)
76 if (ji->active_ag == -1) { 76 if (ji->active_ag == -1) {
77 struct jfs_sb_info *jfs_sb = JFS_SBI(inode->i_sb); 77 struct jfs_sb_info *jfs_sb = JFS_SBI(inode->i_sb);
78 ji->active_ag = BLKTOAG(addressPXD(&ji->ixpxd), jfs_sb); 78 ji->active_ag = BLKTOAG(addressPXD(&ji->ixpxd), jfs_sb);
79 atomic_inc( &jfs_sb->bmap->db_active[ji->active_ag]); 79 atomic_inc(&jfs_sb->bmap->db_active[ji->active_ag]);
80 } 80 }
81 spin_unlock_irq(&ji->ag_lock); 81 spin_unlock_irq(&ji->ag_lock);
82 } 82 }
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 6f1cb2b5ee28..41aa3ca6a6a4 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -134,11 +134,11 @@ int jfs_write_inode(struct inode *inode, struct writeback_control *wbc)
134 * It has been committed since the last change, but was still 134 * It has been committed since the last change, but was still
135 * on the dirty inode list. 135 * on the dirty inode list.
136 */ 136 */
137 if (!test_cflag(COMMIT_Dirty, inode)) { 137 if (!test_cflag(COMMIT_Dirty, inode)) {
138 /* Make sure committed changes hit the disk */ 138 /* Make sure committed changes hit the disk */
139 jfs_flush_journal(JFS_SBI(inode->i_sb)->log, wait); 139 jfs_flush_journal(JFS_SBI(inode->i_sb)->log, wait);
140 return 0; 140 return 0;
141 } 141 }
142 142
143 if (jfs_commit_inode(inode, wait)) { 143 if (jfs_commit_inode(inode, wait)) {
144 jfs_err("jfs_write_inode: jfs_commit_inode failed!"); 144 jfs_err("jfs_write_inode: jfs_commit_inode failed!");
diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c
index 93a1232894f6..8db8b7d61e40 100644
--- a/fs/jfs/ioctl.c
+++ b/fs/jfs/ioctl.c
@@ -180,9 +180,6 @@ long jfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
180 case JFS_IOC_SETFLAGS32: 180 case JFS_IOC_SETFLAGS32:
181 cmd = JFS_IOC_SETFLAGS; 181 cmd = JFS_IOC_SETFLAGS;
182 break; 182 break;
183 case FITRIM:
184 cmd = FITRIM;
185 break;
186 } 183 }
187 return jfs_ioctl(filp, cmd, arg); 184 return jfs_ioctl(filp, cmd, arg);
188} 185}
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index e33be921aa41..a5ac97b9a933 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -1160,7 +1160,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1160 rc = dtModify(tid, new_dir, &new_dname, &ino, 1160 rc = dtModify(tid, new_dir, &new_dname, &ino,
1161 old_ip->i_ino, JFS_RENAME); 1161 old_ip->i_ino, JFS_RENAME);
1162 if (rc) 1162 if (rc)
1163 goto out4; 1163 goto out_tx;
1164 drop_nlink(new_ip); 1164 drop_nlink(new_ip);
1165 if (S_ISDIR(new_ip->i_mode)) { 1165 if (S_ISDIR(new_ip->i_mode)) {
1166 drop_nlink(new_ip); 1166 drop_nlink(new_ip);
@@ -1185,7 +1185,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1185 if ((new_size = commitZeroLink(tid, new_ip)) < 0) { 1185 if ((new_size = commitZeroLink(tid, new_ip)) < 0) {
1186 txAbort(tid, 1); /* Marks FS Dirty */ 1186 txAbort(tid, 1); /* Marks FS Dirty */
1187 rc = new_size; 1187 rc = new_size;
1188 goto out4; 1188 goto out_tx;
1189 } 1189 }
1190 tblk = tid_to_tblock(tid); 1190 tblk = tid_to_tblock(tid);
1191 tblk->xflag |= COMMIT_DELETE; 1191 tblk->xflag |= COMMIT_DELETE;
@@ -1203,7 +1203,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1203 if (rc) { 1203 if (rc) {
1204 jfs_err("jfs_rename didn't expect dtSearch to fail " 1204 jfs_err("jfs_rename didn't expect dtSearch to fail "
1205 "w/rc = %d", rc); 1205 "w/rc = %d", rc);
1206 goto out4; 1206 goto out_tx;
1207 } 1207 }
1208 1208
1209 ino = old_ip->i_ino; 1209 ino = old_ip->i_ino;
@@ -1211,7 +1211,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1211 if (rc) { 1211 if (rc) {
1212 if (rc == -EIO) 1212 if (rc == -EIO)
1213 jfs_err("jfs_rename: dtInsert returned -EIO"); 1213 jfs_err("jfs_rename: dtInsert returned -EIO");
1214 goto out4; 1214 goto out_tx;
1215 } 1215 }
1216 if (S_ISDIR(old_ip->i_mode)) 1216 if (S_ISDIR(old_ip->i_mode))
1217 inc_nlink(new_dir); 1217 inc_nlink(new_dir);
@@ -1226,7 +1226,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1226 jfs_err("jfs_rename did not expect dtDelete to return rc = %d", 1226 jfs_err("jfs_rename did not expect dtDelete to return rc = %d",
1227 rc); 1227 rc);
1228 txAbort(tid, 1); /* Marks Filesystem dirty */ 1228 txAbort(tid, 1); /* Marks Filesystem dirty */
1229 goto out4; 1229 goto out_tx;
1230 } 1230 }
1231 if (S_ISDIR(old_ip->i_mode)) { 1231 if (S_ISDIR(old_ip->i_mode)) {
1232 drop_nlink(old_dir); 1232 drop_nlink(old_dir);
@@ -1285,7 +1285,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1285 1285
1286 rc = txCommit(tid, ipcount, iplist, commit_flag); 1286 rc = txCommit(tid, ipcount, iplist, commit_flag);
1287 1287
1288 out4: 1288 out_tx:
1289 txEnd(tid); 1289 txEnd(tid);
1290 if (new_ip) 1290 if (new_ip)
1291 mutex_unlock(&JFS_IP(new_ip)->commit_mutex); 1291 mutex_unlock(&JFS_IP(new_ip)->commit_mutex);
@@ -1308,13 +1308,6 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1308 } 1308 }
1309 if (new_ip && (new_ip->i_nlink == 0)) 1309 if (new_ip && (new_ip->i_nlink == 0))
1310 set_cflag(COMMIT_Nolink, new_ip); 1310 set_cflag(COMMIT_Nolink, new_ip);
1311 out3:
1312 free_UCSname(&new_dname);
1313 out2:
1314 free_UCSname(&old_dname);
1315 out1:
1316 if (new_ip && !S_ISDIR(new_ip->i_mode))
1317 IWRITE_UNLOCK(new_ip);
1318 /* 1311 /*
1319 * Truncating the directory index table is not guaranteed. It 1312 * Truncating the directory index table is not guaranteed. It
1320 * may need to be done iteratively 1313 * may need to be done iteratively
@@ -1325,7 +1318,13 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1325 1318
1326 clear_cflag(COMMIT_Stale, old_dir); 1319 clear_cflag(COMMIT_Stale, old_dir);
1327 } 1320 }
1328 1321 if (new_ip && !S_ISDIR(new_ip->i_mode))
1322 IWRITE_UNLOCK(new_ip);
1323 out3:
1324 free_UCSname(&new_dname);
1325 out2:
1326 free_UCSname(&old_dname);
1327 out1:
1329 jfs_info("jfs_rename: returning %d", rc); 1328 jfs_info("jfs_rename: returning %d", rc);
1330 return rc; 1329 return rc;
1331} 1330}
diff --git a/fs/locks.c b/fs/locks.c
index 653faabb07f4..d3d558ba4da7 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -862,12 +862,11 @@ static int posix_locks_deadlock(struct file_lock *caller_fl,
862 * whether or not a lock was successfully freed by testing the return 862 * whether or not a lock was successfully freed by testing the return
863 * value for -ENOENT. 863 * value for -ENOENT.
864 */ 864 */
865static int flock_lock_file(struct file *filp, struct file_lock *request) 865static int flock_lock_inode(struct inode *inode, struct file_lock *request)
866{ 866{
867 struct file_lock *new_fl = NULL; 867 struct file_lock *new_fl = NULL;
868 struct file_lock *fl; 868 struct file_lock *fl;
869 struct file_lock_context *ctx; 869 struct file_lock_context *ctx;
870 struct inode *inode = file_inode(filp);
871 int error = 0; 870 int error = 0;
872 bool found = false; 871 bool found = false;
873 LIST_HEAD(dispose); 872 LIST_HEAD(dispose);
@@ -890,7 +889,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
890 goto find_conflict; 889 goto find_conflict;
891 890
892 list_for_each_entry(fl, &ctx->flc_flock, fl_list) { 891 list_for_each_entry(fl, &ctx->flc_flock, fl_list) {
893 if (filp != fl->fl_file) 892 if (request->fl_file != fl->fl_file)
894 continue; 893 continue;
895 if (request->fl_type == fl->fl_type) 894 if (request->fl_type == fl->fl_type)
896 goto out; 895 goto out;
@@ -1164,20 +1163,19 @@ int posix_lock_file(struct file *filp, struct file_lock *fl,
1164EXPORT_SYMBOL(posix_lock_file); 1163EXPORT_SYMBOL(posix_lock_file);
1165 1164
1166/** 1165/**
1167 * posix_lock_file_wait - Apply a POSIX-style lock to a file 1166 * posix_lock_inode_wait - Apply a POSIX-style lock to a file
1168 * @filp: The file to apply the lock to 1167 * @inode: inode of file to which lock request should be applied
1169 * @fl: The lock to be applied 1168 * @fl: The lock to be applied
1170 * 1169 *
1171 * Add a POSIX style lock to a file. 1170 * Variant of posix_lock_file_wait that does not take a filp, and so can be
1172 * We merge adjacent & overlapping locks whenever possible. 1171 * used after the filp has already been torn down.
1173 * POSIX locks are sorted by owner task, then by starting address
1174 */ 1172 */
1175int posix_lock_file_wait(struct file *filp, struct file_lock *fl) 1173int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl)
1176{ 1174{
1177 int error; 1175 int error;
1178 might_sleep (); 1176 might_sleep ();
1179 for (;;) { 1177 for (;;) {
1180 error = posix_lock_file(filp, fl, NULL); 1178 error = __posix_lock_file(inode, fl, NULL);
1181 if (error != FILE_LOCK_DEFERRED) 1179 if (error != FILE_LOCK_DEFERRED)
1182 break; 1180 break;
1183 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); 1181 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
@@ -1189,7 +1187,7 @@ int posix_lock_file_wait(struct file *filp, struct file_lock *fl)
1189 } 1187 }
1190 return error; 1188 return error;
1191} 1189}
1192EXPORT_SYMBOL(posix_lock_file_wait); 1190EXPORT_SYMBOL(posix_lock_inode_wait);
1193 1191
1194/** 1192/**
1195 * locks_mandatory_locked - Check for an active lock 1193 * locks_mandatory_locked - Check for an active lock
@@ -1851,18 +1849,18 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
1851} 1849}
1852 1850
1853/** 1851/**
1854 * flock_lock_file_wait - Apply a FLOCK-style lock to a file 1852 * flock_lock_inode_wait - Apply a FLOCK-style lock to a file
1855 * @filp: The file to apply the lock to 1853 * @inode: inode of the file to apply to
1856 * @fl: The lock to be applied 1854 * @fl: The lock to be applied
1857 * 1855 *
1858 * Add a FLOCK style lock to a file. 1856 * Apply a FLOCK style lock request to an inode.
1859 */ 1857 */
1860int flock_lock_file_wait(struct file *filp, struct file_lock *fl) 1858int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl)
1861{ 1859{
1862 int error; 1860 int error;
1863 might_sleep(); 1861 might_sleep();
1864 for (;;) { 1862 for (;;) {
1865 error = flock_lock_file(filp, fl); 1863 error = flock_lock_inode(inode, fl);
1866 if (error != FILE_LOCK_DEFERRED) 1864 if (error != FILE_LOCK_DEFERRED)
1867 break; 1865 break;
1868 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); 1866 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
@@ -1874,8 +1872,7 @@ int flock_lock_file_wait(struct file *filp, struct file_lock *fl)
1874 } 1872 }
1875 return error; 1873 return error;
1876} 1874}
1877 1875EXPORT_SYMBOL(flock_lock_inode_wait);
1878EXPORT_SYMBOL(flock_lock_file_wait);
1879 1876
1880/** 1877/**
1881 * sys_flock: - flock() system call. 1878 * sys_flock: - flock() system call.
@@ -2401,7 +2398,8 @@ locks_remove_flock(struct file *filp)
2401 .fl_type = F_UNLCK, 2398 .fl_type = F_UNLCK,
2402 .fl_end = OFFSET_MAX, 2399 .fl_end = OFFSET_MAX,
2403 }; 2400 };
2404 struct file_lock_context *flctx = file_inode(filp)->i_flctx; 2401 struct inode *inode = file_inode(filp);
2402 struct file_lock_context *flctx = inode->i_flctx;
2405 2403
2406 if (list_empty(&flctx->flc_flock)) 2404 if (list_empty(&flctx->flc_flock))
2407 return; 2405 return;
@@ -2409,7 +2407,7 @@ locks_remove_flock(struct file *filp)
2409 if (filp->f_op->flock) 2407 if (filp->f_op->flock)
2410 filp->f_op->flock(filp, F_SETLKW, &fl); 2408 filp->f_op->flock(filp, F_SETLKW, &fl);
2411 else 2409 else
2412 flock_lock_file(filp, &fl); 2410 flock_lock_inode(inode, &fl);
2413 2411
2414 if (fl.fl_ops && fl.fl_ops->fl_release_private) 2412 if (fl.fl_ops && fl.fl_ops->fl_release_private)
2415 fl.fl_ops->fl_release_private(&fl); 2413 fl.fl_ops->fl_release_private(&fl);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 6f228b5af819..8bee93469617 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5439,15 +5439,15 @@ static int nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock *
5439 return err; 5439 return err;
5440} 5440}
5441 5441
5442static int do_vfs_lock(struct file *file, struct file_lock *fl) 5442static int do_vfs_lock(struct inode *inode, struct file_lock *fl)
5443{ 5443{
5444 int res = 0; 5444 int res = 0;
5445 switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) { 5445 switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) {
5446 case FL_POSIX: 5446 case FL_POSIX:
5447 res = posix_lock_file_wait(file, fl); 5447 res = posix_lock_inode_wait(inode, fl);
5448 break; 5448 break;
5449 case FL_FLOCK: 5449 case FL_FLOCK:
5450 res = flock_lock_file_wait(file, fl); 5450 res = flock_lock_inode_wait(inode, fl);
5451 break; 5451 break;
5452 default: 5452 default:
5453 BUG(); 5453 BUG();
@@ -5484,7 +5484,6 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl,
5484 atomic_inc(&lsp->ls_count); 5484 atomic_inc(&lsp->ls_count);
5485 /* Ensure we don't close file until we're done freeing locks! */ 5485 /* Ensure we don't close file until we're done freeing locks! */
5486 p->ctx = get_nfs_open_context(ctx); 5486 p->ctx = get_nfs_open_context(ctx);
5487 get_file(fl->fl_file);
5488 memcpy(&p->fl, fl, sizeof(p->fl)); 5487 memcpy(&p->fl, fl, sizeof(p->fl));
5489 p->server = NFS_SERVER(inode); 5488 p->server = NFS_SERVER(inode);
5490 return p; 5489 return p;
@@ -5496,7 +5495,6 @@ static void nfs4_locku_release_calldata(void *data)
5496 nfs_free_seqid(calldata->arg.seqid); 5495 nfs_free_seqid(calldata->arg.seqid);
5497 nfs4_put_lock_state(calldata->lsp); 5496 nfs4_put_lock_state(calldata->lsp);
5498 put_nfs_open_context(calldata->ctx); 5497 put_nfs_open_context(calldata->ctx);
5499 fput(calldata->fl.fl_file);
5500 kfree(calldata); 5498 kfree(calldata);
5501} 5499}
5502 5500
@@ -5509,7 +5507,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
5509 switch (task->tk_status) { 5507 switch (task->tk_status) {
5510 case 0: 5508 case 0:
5511 renew_lease(calldata->server, calldata->timestamp); 5509 renew_lease(calldata->server, calldata->timestamp);
5512 do_vfs_lock(calldata->fl.fl_file, &calldata->fl); 5510 do_vfs_lock(calldata->lsp->ls_state->inode, &calldata->fl);
5513 if (nfs4_update_lock_stateid(calldata->lsp, 5511 if (nfs4_update_lock_stateid(calldata->lsp,
5514 &calldata->res.stateid)) 5512 &calldata->res.stateid))
5515 break; 5513 break;
@@ -5617,7 +5615,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
5617 mutex_lock(&sp->so_delegreturn_mutex); 5615 mutex_lock(&sp->so_delegreturn_mutex);
5618 /* Exclude nfs4_reclaim_open_stateid() - note nesting! */ 5616 /* Exclude nfs4_reclaim_open_stateid() - note nesting! */
5619 down_read(&nfsi->rwsem); 5617 down_read(&nfsi->rwsem);
5620 if (do_vfs_lock(request->fl_file, request) == -ENOENT) { 5618 if (do_vfs_lock(inode, request) == -ENOENT) {
5621 up_read(&nfsi->rwsem); 5619 up_read(&nfsi->rwsem);
5622 mutex_unlock(&sp->so_delegreturn_mutex); 5620 mutex_unlock(&sp->so_delegreturn_mutex);
5623 goto out; 5621 goto out;
@@ -5758,7 +5756,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
5758 data->timestamp); 5756 data->timestamp);
5759 if (data->arg.new_lock) { 5757 if (data->arg.new_lock) {
5760 data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS); 5758 data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS);
5761 if (do_vfs_lock(data->fl.fl_file, &data->fl) < 0) { 5759 if (do_vfs_lock(lsp->ls_state->inode, &data->fl) < 0) {
5762 rpc_restart_call_prepare(task); 5760 rpc_restart_call_prepare(task);
5763 break; 5761 break;
5764 } 5762 }
@@ -6000,7 +5998,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
6000 if (status != 0) 5998 if (status != 0)
6001 goto out; 5999 goto out;
6002 request->fl_flags |= FL_ACCESS; 6000 request->fl_flags |= FL_ACCESS;
6003 status = do_vfs_lock(request->fl_file, request); 6001 status = do_vfs_lock(state->inode, request);
6004 if (status < 0) 6002 if (status < 0)
6005 goto out; 6003 goto out;
6006 down_read(&nfsi->rwsem); 6004 down_read(&nfsi->rwsem);
@@ -6008,7 +6006,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
6008 /* Yes: cache locks! */ 6006 /* Yes: cache locks! */
6009 /* ...but avoid races with delegation recall... */ 6007 /* ...but avoid races with delegation recall... */
6010 request->fl_flags = fl_flags & ~FL_SLEEP; 6008 request->fl_flags = fl_flags & ~FL_SLEEP;
6011 status = do_vfs_lock(request->fl_file, request); 6009 status = do_vfs_lock(state->inode, request);
6012 up_read(&nfsi->rwsem); 6010 up_read(&nfsi->rwsem);
6013 goto out; 6011 goto out;
6014 } 6012 }
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 9a20e513d7eb..aba43811d6ef 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -1369,7 +1369,6 @@ long nilfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
1369 case NILFS_IOCTL_SYNC: 1369 case NILFS_IOCTL_SYNC:
1370 case NILFS_IOCTL_RESIZE: 1370 case NILFS_IOCTL_RESIZE:
1371 case NILFS_IOCTL_SET_ALLOC_RANGE: 1371 case NILFS_IOCTL_SET_ALLOC_RANGE:
1372 case FITRIM:
1373 break; 1372 break;
1374 default: 1373 default:
1375 return -ENOIOCTLCMD; 1374 return -ENOIOCTLCMD;
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 92e48c70f0f0..3e594ce41010 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -152,31 +152,15 @@ void fsnotify_destroy_mark_locked(struct fsnotify_mark *mark,
152 BUG(); 152 BUG();
153 153
154 list_del_init(&mark->g_list); 154 list_del_init(&mark->g_list);
155
156 spin_unlock(&mark->lock); 155 spin_unlock(&mark->lock);
157 156
158 if (inode && (mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED)) 157 if (inode && (mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED))
159 iput(inode); 158 iput(inode);
160 /* release lock temporarily */
161 mutex_unlock(&group->mark_mutex);
162 159
163 spin_lock(&destroy_lock); 160 spin_lock(&destroy_lock);
164 list_add(&mark->g_list, &destroy_list); 161 list_add(&mark->g_list, &destroy_list);
165 spin_unlock(&destroy_lock); 162 spin_unlock(&destroy_lock);
166 wake_up(&destroy_waitq); 163 wake_up(&destroy_waitq);
167 /*
168 * We don't necessarily have a ref on mark from caller so the above destroy
169 * may have actually freed it, unless this group provides a 'freeing_mark'
170 * function which must be holding a reference.
171 */
172
173 /*
174 * Some groups like to know that marks are being freed. This is a
175 * callback to the group function to let it know that this mark
176 * is being freed.
177 */
178 if (group->ops->freeing_mark)
179 group->ops->freeing_mark(mark, group);
180 164
181 /* 165 /*
182 * __fsnotify_update_child_dentry_flags(inode); 166 * __fsnotify_update_child_dentry_flags(inode);
@@ -191,8 +175,6 @@ void fsnotify_destroy_mark_locked(struct fsnotify_mark *mark,
191 */ 175 */
192 176
193 atomic_dec(&group->num_marks); 177 atomic_dec(&group->num_marks);
194
195 mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
196} 178}
197 179
198void fsnotify_destroy_mark(struct fsnotify_mark *mark, 180void fsnotify_destroy_mark(struct fsnotify_mark *mark,
@@ -205,7 +187,10 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark,
205 187
206/* 188/*
207 * Destroy all marks in the given list. The marks must be already detached from 189 * Destroy all marks in the given list. The marks must be already detached from
208 * the original inode / vfsmount. 190 * the original inode / vfsmount. Note that we can race with
191 * fsnotify_clear_marks_by_group_flags(). However we hold a reference to each
192 * mark so they won't get freed from under us and nobody else touches our
193 * free_list list_head.
209 */ 194 */
210void fsnotify_destroy_marks(struct list_head *to_free) 195void fsnotify_destroy_marks(struct list_head *to_free)
211{ 196{
@@ -406,7 +391,7 @@ struct fsnotify_mark *fsnotify_find_mark(struct hlist_head *head,
406} 391}
407 392
408/* 393/*
409 * clear any marks in a group in which mark->flags & flags is true 394 * Clear any marks in a group in which mark->flags & flags is true.
410 */ 395 */
411void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, 396void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group,
412 unsigned int flags) 397 unsigned int flags)
@@ -460,6 +445,7 @@ static int fsnotify_mark_destroy(void *ignored)
460{ 445{
461 struct fsnotify_mark *mark, *next; 446 struct fsnotify_mark *mark, *next;
462 struct list_head private_destroy_list; 447 struct list_head private_destroy_list;
448 struct fsnotify_group *group;
463 449
464 for (;;) { 450 for (;;) {
465 spin_lock(&destroy_lock); 451 spin_lock(&destroy_lock);
@@ -471,6 +457,14 @@ static int fsnotify_mark_destroy(void *ignored)
471 457
472 list_for_each_entry_safe(mark, next, &private_destroy_list, g_list) { 458 list_for_each_entry_safe(mark, next, &private_destroy_list, g_list) {
473 list_del_init(&mark->g_list); 459 list_del_init(&mark->g_list);
460 group = mark->group;
461 /*
462 * Some groups like to know that marks are being freed.
463 * This is a callback to the group function to let it
464 * know that this mark is being freed.
465 */
466 if (group && group->ops->freeing_mark)
467 group->ops->freeing_mark(mark, group);
474 fsnotify_put_mark(mark); 468 fsnotify_put_mark(mark);
475 } 469 }
476 470
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 53e6c40ed4c6..3cb097ccce60 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -980,7 +980,6 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
980 case OCFS2_IOC_GROUP_EXTEND: 980 case OCFS2_IOC_GROUP_EXTEND:
981 case OCFS2_IOC_GROUP_ADD: 981 case OCFS2_IOC_GROUP_ADD:
982 case OCFS2_IOC_GROUP_ADD64: 982 case OCFS2_IOC_GROUP_ADD64:
983 case FITRIM:
984 break; 983 break;
985 case OCFS2_IOC_REFLINK: 984 case OCFS2_IOC_REFLINK:
986 if (copy_from_user(&args, argp, sizeof(args))) 985 if (copy_from_user(&args, argp, sizeof(args)))
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index f140e3dbfb7b..d9da5a4e9382 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -343,6 +343,9 @@ struct inode *ovl_d_select_inode(struct dentry *dentry, unsigned file_flags)
343 struct path realpath; 343 struct path realpath;
344 enum ovl_path_type type; 344 enum ovl_path_type type;
345 345
346 if (d_is_dir(dentry))
347 return d_backing_inode(dentry);
348
346 type = ovl_path_real(dentry, &realpath); 349 type = ovl_path_real(dentry, &realpath);
347 if (ovl_open_need_copy_up(file_flags, type, realpath.dentry)) { 350 if (ovl_open_need_copy_up(file_flags, type, realpath.dentry)) {
348 err = ovl_want_write(dentry); 351 err = ovl_want_write(dentry);
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
index d751fcb637bb..1ade1206bb89 100644
--- a/fs/proc/Kconfig
+++ b/fs/proc/Kconfig
@@ -75,3 +75,9 @@ config PROC_PAGE_MONITOR
75config PROC_CHILDREN 75config PROC_CHILDREN
76 bool "Include /proc/<pid>/task/<tid>/children file" 76 bool "Include /proc/<pid>/task/<tid>/children file"
77 default n 77 default n
78 help
79 Provides a fast way to retrieve first level children pids of a task. See
80 <file:Documentation/filesystems/proc.txt> for more information.
81
82 Say Y if you are running any user-space software which takes benefit from
83 this interface. For example, rkt is such a piece of software.
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 87782e874b6a..aa50d1ac28fc 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -243,6 +243,11 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
243 len1 = arg_end - arg_start; 243 len1 = arg_end - arg_start;
244 len2 = env_end - env_start; 244 len2 = env_end - env_start;
245 245
246 /* Empty ARGV. */
247 if (len1 == 0) {
248 rv = 0;
249 goto out_free_page;
250 }
246 /* 251 /*
247 * Inherently racy -- command line shares address space 252 * Inherently racy -- command line shares address space
248 * with code and data. 253 * with code and data.
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 91a4e6426321..92e6726f6e37 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -92,7 +92,7 @@ static size_t get_kcore_size(int *nphdr, size_t *elf_buflen)
92 roundup(sizeof(CORE_STR), 4)) + 92 roundup(sizeof(CORE_STR), 4)) +
93 roundup(sizeof(struct elf_prstatus), 4) + 93 roundup(sizeof(struct elf_prstatus), 4) +
94 roundup(sizeof(struct elf_prpsinfo), 4) + 94 roundup(sizeof(struct elf_prpsinfo), 4) +
95 roundup(sizeof(struct task_struct), 4); 95 roundup(arch_task_struct_size, 4);
96 *elf_buflen = PAGE_ALIGN(*elf_buflen); 96 *elf_buflen = PAGE_ALIGN(*elf_buflen);
97 return size + *elf_buflen; 97 return size + *elf_buflen;
98} 98}
@@ -415,7 +415,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
415 /* set up the task structure */ 415 /* set up the task structure */
416 notes[2].name = CORE_STR; 416 notes[2].name = CORE_STR;
417 notes[2].type = NT_TASKSTRUCT; 417 notes[2].type = NT_TASKSTRUCT;
418 notes[2].datasz = sizeof(struct task_struct); 418 notes[2].datasz = arch_task_struct_size;
419 notes[2].data = current; 419 notes[2].data = current;
420 420
421 nhdr->p_filesz += notesize(&notes[2]); 421 nhdr->p_filesz += notesize(&notes[2]);