aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-02-19 03:00:35 -0500
committerIngo Molnar <mingo@elte.hu>2009-02-19 03:00:35 -0500
commit72c26c9a26ea7f2f3d14f162c2ebb07805f724ea (patch)
treebf1b4bc0b69f96c79474f9edb9cf0e811c95f2dc /fs
parent37bd824a35a60abc73e5fa8816bd5f50c913d69b (diff)
parentba95fd47d177d46743ad94055908d22840370e06 (diff)
Merge branch 'linus' into tracing/blktrace
Conflicts: block/blktrace.c Semantic merge: kernel/trace/blktrace.c Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'fs')
-rw-r--r--fs/bio.c5
-rw-r--r--fs/btrfs/ctree.c58
-rw-r--r--fs/btrfs/ctree.h11
-rw-r--r--fs/btrfs/disk-io.c46
-rw-r--r--fs/btrfs/disk-io.h10
-rw-r--r--fs/btrfs/extent-tree.c83
-rw-r--r--fs/btrfs/extent_io.c2
-rw-r--r--fs/btrfs/inode-map.c1
-rw-r--r--fs/btrfs/inode.c4
-rw-r--r--fs/btrfs/locking.c11
-rw-r--r--fs/btrfs/super.c5
-rw-r--r--fs/btrfs/transaction.c2
-rw-r--r--fs/btrfs/tree-log.c2
-rw-r--r--fs/btrfs/volumes.c6
-rw-r--r--fs/buffer.c3
-rw-r--r--fs/compat_ioctl.c2
-rw-r--r--fs/ext4/ext4.h2
-rw-r--r--fs/ext4/inode.c27
-rw-r--r--fs/ext4/mballoc.c32
-rw-r--r--fs/ext4/migrate.c8
-rw-r--r--fs/ext4/super.c11
-rw-r--r--fs/jbd2/journal.c17
-rw-r--r--fs/jbd2/transaction.c42
-rw-r--r--fs/namespace.c6
-rw-r--r--fs/notify/inotify/inotify.c2
-rw-r--r--fs/ocfs2/journal.h6
-rw-r--r--fs/seq_file.c36
-rw-r--r--fs/super.c17
-rw-r--r--fs/timerfd.c12
29 files changed, 322 insertions, 147 deletions
diff --git a/fs/bio.c b/fs/bio.c
index 062299acbccd..72ab251cdb9c 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -302,9 +302,10 @@ void bio_init(struct bio *bio)
302struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) 302struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
303{ 303{
304 struct bio *bio = NULL; 304 struct bio *bio = NULL;
305 void *p;
305 306
306 if (bs) { 307 if (bs) {
307 void *p = mempool_alloc(bs->bio_pool, gfp_mask); 308 p = mempool_alloc(bs->bio_pool, gfp_mask);
308 309
309 if (p) 310 if (p)
310 bio = p + bs->front_pad; 311 bio = p + bs->front_pad;
@@ -329,7 +330,7 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
329 } 330 }
330 if (unlikely(!bvl)) { 331 if (unlikely(!bvl)) {
331 if (bs) 332 if (bs)
332 mempool_free(bio, bs->bio_pool); 333 mempool_free(p, bs->bio_pool);
333 else 334 else
334 kfree(bio); 335 kfree(bio);
335 bio = NULL; 336 bio = NULL;
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 35443cc4b9a9..42491d728e99 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -38,19 +38,12 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
38static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, 38static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
39 struct btrfs_path *path, int level, int slot); 39 struct btrfs_path *path, int level, int slot);
40 40
41inline void btrfs_init_path(struct btrfs_path *p)
42{
43 memset(p, 0, sizeof(*p));
44}
45
46struct btrfs_path *btrfs_alloc_path(void) 41struct btrfs_path *btrfs_alloc_path(void)
47{ 42{
48 struct btrfs_path *path; 43 struct btrfs_path *path;
49 path = kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS); 44 path = kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS);
50 if (path) { 45 if (path)
51 btrfs_init_path(path);
52 path->reada = 1; 46 path->reada = 1;
53 }
54 return path; 47 return path;
55} 48}
56 49
@@ -69,14 +62,38 @@ noinline void btrfs_set_path_blocking(struct btrfs_path *p)
69 62
70/* 63/*
71 * reset all the locked nodes in the patch to spinning locks. 64 * reset all the locked nodes in the patch to spinning locks.
65 *
66 * held is used to keep lockdep happy, when lockdep is enabled
67 * we set held to a blocking lock before we go around and
68 * retake all the spinlocks in the path. You can safely use NULL
69 * for held
72 */ 70 */
73noinline void btrfs_clear_path_blocking(struct btrfs_path *p) 71noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
72 struct extent_buffer *held)
74{ 73{
75 int i; 74 int i;
76 for (i = 0; i < BTRFS_MAX_LEVEL; i++) { 75
76#ifdef CONFIG_DEBUG_LOCK_ALLOC
77 /* lockdep really cares that we take all of these spinlocks
78 * in the right order. If any of the locks in the path are not
79 * currently blocking, it is going to complain. So, make really
80 * really sure by forcing the path to blocking before we clear
81 * the path blocking.
82 */
83 if (held)
84 btrfs_set_lock_blocking(held);
85 btrfs_set_path_blocking(p);
86#endif
87
88 for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) {
77 if (p->nodes[i] && p->locks[i]) 89 if (p->nodes[i] && p->locks[i])
78 btrfs_clear_lock_blocking(p->nodes[i]); 90 btrfs_clear_lock_blocking(p->nodes[i]);
79 } 91 }
92
93#ifdef CONFIG_DEBUG_LOCK_ALLOC
94 if (held)
95 btrfs_clear_lock_blocking(held);
96#endif
80} 97}
81 98
82/* this also releases the path */ 99/* this also releases the path */
@@ -286,7 +303,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
286 trans->transid, level, &ins); 303 trans->transid, level, &ins);
287 BUG_ON(ret); 304 BUG_ON(ret);
288 cow = btrfs_init_new_buffer(trans, root, prealloc_dest, 305 cow = btrfs_init_new_buffer(trans, root, prealloc_dest,
289 buf->len); 306 buf->len, level);
290 } else { 307 } else {
291 cow = btrfs_alloc_free_block(trans, root, buf->len, 308 cow = btrfs_alloc_free_block(trans, root, buf->len,
292 parent_start, 309 parent_start,
@@ -917,9 +934,9 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
917 934
918 /* promote the child to a root */ 935 /* promote the child to a root */
919 child = read_node_slot(root, mid, 0); 936 child = read_node_slot(root, mid, 0);
937 BUG_ON(!child);
920 btrfs_tree_lock(child); 938 btrfs_tree_lock(child);
921 btrfs_set_lock_blocking(child); 939 btrfs_set_lock_blocking(child);
922 BUG_ON(!child);
923 ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 0); 940 ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 0);
924 BUG_ON(ret); 941 BUG_ON(ret);
925 942
@@ -1566,7 +1583,7 @@ cow_done:
1566 if (!p->skip_locking) 1583 if (!p->skip_locking)
1567 p->locks[level] = 1; 1584 p->locks[level] = 1;
1568 1585
1569 btrfs_clear_path_blocking(p); 1586 btrfs_clear_path_blocking(p, NULL);
1570 1587
1571 /* 1588 /*
1572 * we have a lock on b and as long as we aren't changing 1589 * we have a lock on b and as long as we aren't changing
@@ -1605,7 +1622,7 @@ cow_done:
1605 1622
1606 btrfs_set_path_blocking(p); 1623 btrfs_set_path_blocking(p);
1607 sret = split_node(trans, root, p, level); 1624 sret = split_node(trans, root, p, level);
1608 btrfs_clear_path_blocking(p); 1625 btrfs_clear_path_blocking(p, NULL);
1609 1626
1610 BUG_ON(sret > 0); 1627 BUG_ON(sret > 0);
1611 if (sret) { 1628 if (sret) {
@@ -1625,7 +1642,7 @@ cow_done:
1625 1642
1626 btrfs_set_path_blocking(p); 1643 btrfs_set_path_blocking(p);
1627 sret = balance_level(trans, root, p, level); 1644 sret = balance_level(trans, root, p, level);
1628 btrfs_clear_path_blocking(p); 1645 btrfs_clear_path_blocking(p, NULL);
1629 1646
1630 if (sret) { 1647 if (sret) {
1631 ret = sret; 1648 ret = sret;
@@ -1688,13 +1705,13 @@ cow_done:
1688 if (!p->skip_locking) { 1705 if (!p->skip_locking) {
1689 int lret; 1706 int lret;
1690 1707
1691 btrfs_clear_path_blocking(p); 1708 btrfs_clear_path_blocking(p, NULL);
1692 lret = btrfs_try_spin_lock(b); 1709 lret = btrfs_try_spin_lock(b);
1693 1710
1694 if (!lret) { 1711 if (!lret) {
1695 btrfs_set_path_blocking(p); 1712 btrfs_set_path_blocking(p);
1696 btrfs_tree_lock(b); 1713 btrfs_tree_lock(b);
1697 btrfs_clear_path_blocking(p); 1714 btrfs_clear_path_blocking(p, b);
1698 } 1715 }
1699 } 1716 }
1700 } else { 1717 } else {
@@ -1706,7 +1723,7 @@ cow_done:
1706 btrfs_set_path_blocking(p); 1723 btrfs_set_path_blocking(p);
1707 sret = split_leaf(trans, root, key, 1724 sret = split_leaf(trans, root, key,
1708 p, ins_len, ret == 0); 1725 p, ins_len, ret == 0);
1709 btrfs_clear_path_blocking(p); 1726 btrfs_clear_path_blocking(p, NULL);
1710 1727
1711 BUG_ON(sret > 0); 1728 BUG_ON(sret > 0);
1712 if (sret) { 1729 if (sret) {
@@ -3926,7 +3943,6 @@ find_next_key:
3926 btrfs_release_path(root, path); 3943 btrfs_release_path(root, path);
3927 goto again; 3944 goto again;
3928 } else { 3945 } else {
3929 btrfs_clear_path_blocking(path);
3930 goto out; 3946 goto out;
3931 } 3947 }
3932 } 3948 }
@@ -3946,7 +3962,7 @@ find_next_key:
3946 path->locks[level - 1] = 1; 3962 path->locks[level - 1] = 1;
3947 path->nodes[level - 1] = cur; 3963 path->nodes[level - 1] = cur;
3948 unlock_up(path, level, 1); 3964 unlock_up(path, level, 1);
3949 btrfs_clear_path_blocking(path); 3965 btrfs_clear_path_blocking(path, NULL);
3950 } 3966 }
3951out: 3967out:
3952 if (ret == 0) 3968 if (ret == 0)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 531db112c8bd..766b31ae3186 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -43,11 +43,7 @@ struct btrfs_ordered_sum;
43 43
44#define BTRFS_ACL_NOT_CACHED ((void *)-1) 44#define BTRFS_ACL_NOT_CACHED ((void *)-1)
45 45
46#ifdef CONFIG_LOCKDEP 46#define BTRFS_MAX_LEVEL 8
47# define BTRFS_MAX_LEVEL 7
48#else
49# define BTRFS_MAX_LEVEL 8
50#endif
51 47
52/* holds pointers to all of the tree roots */ 48/* holds pointers to all of the tree roots */
53#define BTRFS_ROOT_TREE_OBJECTID 1ULL 49#define BTRFS_ROOT_TREE_OBJECTID 1ULL
@@ -1715,7 +1711,8 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1715 u64 empty_size); 1711 u64 empty_size);
1716struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, 1712struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
1717 struct btrfs_root *root, 1713 struct btrfs_root *root,
1718 u64 bytenr, u32 blocksize); 1714 u64 bytenr, u32 blocksize,
1715 int level);
1719int btrfs_alloc_extent(struct btrfs_trans_handle *trans, 1716int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1720 struct btrfs_root *root, 1717 struct btrfs_root *root,
1721 u64 num_bytes, u64 parent, u64 min_bytes, 1718 u64 num_bytes, u64 parent, u64 min_bytes,
@@ -1834,9 +1831,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
1834void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); 1831void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p);
1835struct btrfs_path *btrfs_alloc_path(void); 1832struct btrfs_path *btrfs_alloc_path(void);
1836void btrfs_free_path(struct btrfs_path *p); 1833void btrfs_free_path(struct btrfs_path *p);
1837void btrfs_init_path(struct btrfs_path *p);
1838void btrfs_set_path_blocking(struct btrfs_path *p); 1834void btrfs_set_path_blocking(struct btrfs_path *p);
1839void btrfs_clear_path_blocking(struct btrfs_path *p);
1840void btrfs_unlock_up_safe(struct btrfs_path *p, int level); 1835void btrfs_unlock_up_safe(struct btrfs_path *p, int level);
1841 1836
1842int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, 1837int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 5aebddd71193..adda739a0215 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -75,6 +75,40 @@ struct async_submit_bio {
75 struct btrfs_work work; 75 struct btrfs_work work;
76}; 76};
77 77
78/* These are used to set the lockdep class on the extent buffer locks.
79 * The class is set by the readpage_end_io_hook after the buffer has
80 * passed csum validation but before the pages are unlocked.
81 *
82 * The lockdep class is also set by btrfs_init_new_buffer on freshly
83 * allocated blocks.
84 *
85 * The class is based on the level in the tree block, which allows lockdep
86 * to know that lower nodes nest inside the locks of higher nodes.
87 *
88 * We also add a check to make sure the highest level of the tree is
89 * the same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this
90 * code needs update as well.
91 */
92#ifdef CONFIG_DEBUG_LOCK_ALLOC
93# if BTRFS_MAX_LEVEL != 8
94# error
95# endif
96static struct lock_class_key btrfs_eb_class[BTRFS_MAX_LEVEL + 1];
97static const char *btrfs_eb_name[BTRFS_MAX_LEVEL + 1] = {
98 /* leaf */
99 "btrfs-extent-00",
100 "btrfs-extent-01",
101 "btrfs-extent-02",
102 "btrfs-extent-03",
103 "btrfs-extent-04",
104 "btrfs-extent-05",
105 "btrfs-extent-06",
106 "btrfs-extent-07",
107 /* highest possible level */
108 "btrfs-extent-08",
109};
110#endif
111
78/* 112/*
79 * extents on the btree inode are pretty simple, there's one extent 113 * extents on the btree inode are pretty simple, there's one extent
80 * that covers the entire device 114 * that covers the entire device
@@ -347,6 +381,15 @@ static int check_tree_block_fsid(struct btrfs_root *root,
347 return ret; 381 return ret;
348} 382}
349 383
384#ifdef CONFIG_DEBUG_LOCK_ALLOC
385void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level)
386{
387 lockdep_set_class_and_name(&eb->lock,
388 &btrfs_eb_class[level],
389 btrfs_eb_name[level]);
390}
391#endif
392
350static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, 393static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
351 struct extent_state *state) 394 struct extent_state *state)
352{ 395{
@@ -392,6 +435,8 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
392 } 435 }
393 found_level = btrfs_header_level(eb); 436 found_level = btrfs_header_level(eb);
394 437
438 btrfs_set_buffer_lockdep_class(eb, found_level);
439
395 ret = csum_tree_block(root, eb, 1); 440 ret = csum_tree_block(root, eb, 1);
396 if (ret) 441 if (ret)
397 ret = -EIO; 442 ret = -EIO;
@@ -1777,7 +1822,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1777 ret = find_and_setup_root(tree_root, fs_info, 1822 ret = find_and_setup_root(tree_root, fs_info,
1778 BTRFS_DEV_TREE_OBJECTID, dev_root); 1823 BTRFS_DEV_TREE_OBJECTID, dev_root);
1779 dev_root->track_dirty = 1; 1824 dev_root->track_dirty = 1;
1780
1781 if (ret) 1825 if (ret)
1782 goto fail_extent_root; 1826 goto fail_extent_root;
1783 1827
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 494a56eb2986..95029db227be 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -101,4 +101,14 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
101int btrfs_add_log_tree(struct btrfs_trans_handle *trans, 101int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
102 struct btrfs_root *root); 102 struct btrfs_root *root);
103int btree_lock_page_hook(struct page *page); 103int btree_lock_page_hook(struct page *page);
104
105
106#ifdef CONFIG_DEBUG_LOCK_ALLOC
107void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level);
108#else
109static inline void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb,
110 int level)
111{
112}
113#endif
104#endif 114#endif
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 7527523c2d2d..0a5d796c9f7e 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1323,8 +1323,25 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1323int btrfs_extent_post_op(struct btrfs_trans_handle *trans, 1323int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
1324 struct btrfs_root *root) 1324 struct btrfs_root *root)
1325{ 1325{
1326 finish_current_insert(trans, root->fs_info->extent_root, 1); 1326 u64 start;
1327 del_pending_extents(trans, root->fs_info->extent_root, 1); 1327 u64 end;
1328 int ret;
1329
1330 while(1) {
1331 finish_current_insert(trans, root->fs_info->extent_root, 1);
1332 del_pending_extents(trans, root->fs_info->extent_root, 1);
1333
1334 /* is there more work to do? */
1335 ret = find_first_extent_bit(&root->fs_info->pending_del,
1336 0, &start, &end, EXTENT_WRITEBACK);
1337 if (!ret)
1338 continue;
1339 ret = find_first_extent_bit(&root->fs_info->extent_ins,
1340 0, &start, &end, EXTENT_WRITEBACK);
1341 if (!ret)
1342 continue;
1343 break;
1344 }
1328 return 0; 1345 return 0;
1329} 1346}
1330 1347
@@ -2211,13 +2228,12 @@ static int finish_current_insert(struct btrfs_trans_handle *trans,
2211 u64 end; 2228 u64 end;
2212 u64 priv; 2229 u64 priv;
2213 u64 search = 0; 2230 u64 search = 0;
2214 u64 skipped = 0;
2215 struct btrfs_fs_info *info = extent_root->fs_info; 2231 struct btrfs_fs_info *info = extent_root->fs_info;
2216 struct btrfs_path *path; 2232 struct btrfs_path *path;
2217 struct pending_extent_op *extent_op, *tmp; 2233 struct pending_extent_op *extent_op, *tmp;
2218 struct list_head insert_list, update_list; 2234 struct list_head insert_list, update_list;
2219 int ret; 2235 int ret;
2220 int num_inserts = 0, max_inserts; 2236 int num_inserts = 0, max_inserts, restart = 0;
2221 2237
2222 path = btrfs_alloc_path(); 2238 path = btrfs_alloc_path();
2223 INIT_LIST_HEAD(&insert_list); 2239 INIT_LIST_HEAD(&insert_list);
@@ -2233,19 +2249,19 @@ again:
2233 ret = find_first_extent_bit(&info->extent_ins, search, &start, 2249 ret = find_first_extent_bit(&info->extent_ins, search, &start,
2234 &end, EXTENT_WRITEBACK); 2250 &end, EXTENT_WRITEBACK);
2235 if (ret) { 2251 if (ret) {
2236 if (skipped && all && !num_inserts && 2252 if (restart && !num_inserts &&
2237 list_empty(&update_list)) { 2253 list_empty(&update_list)) {
2238 skipped = 0; 2254 restart = 0;
2239 search = 0; 2255 search = 0;
2240 continue; 2256 continue;
2241 } 2257 }
2242 mutex_unlock(&info->extent_ins_mutex);
2243 break; 2258 break;
2244 } 2259 }
2245 2260
2246 ret = try_lock_extent(&info->extent_ins, start, end, GFP_NOFS); 2261 ret = try_lock_extent(&info->extent_ins, start, end, GFP_NOFS);
2247 if (!ret) { 2262 if (!ret) {
2248 skipped = 1; 2263 if (all)
2264 restart = 1;
2249 search = end + 1; 2265 search = end + 1;
2250 if (need_resched()) { 2266 if (need_resched()) {
2251 mutex_unlock(&info->extent_ins_mutex); 2267 mutex_unlock(&info->extent_ins_mutex);
@@ -2264,7 +2280,7 @@ again:
2264 list_add_tail(&extent_op->list, &insert_list); 2280 list_add_tail(&extent_op->list, &insert_list);
2265 search = end + 1; 2281 search = end + 1;
2266 if (num_inserts == max_inserts) { 2282 if (num_inserts == max_inserts) {
2267 mutex_unlock(&info->extent_ins_mutex); 2283 restart = 1;
2268 break; 2284 break;
2269 } 2285 }
2270 } else if (extent_op->type == PENDING_BACKREF_UPDATE) { 2286 } else if (extent_op->type == PENDING_BACKREF_UPDATE) {
@@ -2280,7 +2296,6 @@ again:
2280 * somebody marked this thing for deletion then just unlock it and be 2296 * somebody marked this thing for deletion then just unlock it and be
2281 * done, the free_extents will handle it 2297 * done, the free_extents will handle it
2282 */ 2298 */
2283 mutex_lock(&info->extent_ins_mutex);
2284 list_for_each_entry_safe(extent_op, tmp, &update_list, list) { 2299 list_for_each_entry_safe(extent_op, tmp, &update_list, list) {
2285 clear_extent_bits(&info->extent_ins, extent_op->bytenr, 2300 clear_extent_bits(&info->extent_ins, extent_op->bytenr,
2286 extent_op->bytenr + extent_op->num_bytes - 1, 2301 extent_op->bytenr + extent_op->num_bytes - 1,
@@ -2302,6 +2317,10 @@ again:
2302 if (!list_empty(&update_list)) { 2317 if (!list_empty(&update_list)) {
2303 ret = update_backrefs(trans, extent_root, path, &update_list); 2318 ret = update_backrefs(trans, extent_root, path, &update_list);
2304 BUG_ON(ret); 2319 BUG_ON(ret);
2320
2321 /* we may have COW'ed new blocks, so lets start over */
2322 if (all)
2323 restart = 1;
2305 } 2324 }
2306 2325
2307 /* 2326 /*
@@ -2309,9 +2328,9 @@ again:
2309 * need to make sure everything is cleaned then reset everything and 2328 * need to make sure everything is cleaned then reset everything and
2310 * go back to the beginning 2329 * go back to the beginning
2311 */ 2330 */
2312 if (!num_inserts && all && skipped) { 2331 if (!num_inserts && restart) {
2313 search = 0; 2332 search = 0;
2314 skipped = 0; 2333 restart = 0;
2315 INIT_LIST_HEAD(&update_list); 2334 INIT_LIST_HEAD(&update_list);
2316 INIT_LIST_HEAD(&insert_list); 2335 INIT_LIST_HEAD(&insert_list);
2317 goto again; 2336 goto again;
@@ -2368,27 +2387,19 @@ again:
2368 BUG_ON(ret); 2387 BUG_ON(ret);
2369 2388
2370 /* 2389 /*
2371 * if we broke out of the loop in order to insert stuff because we hit 2390 * if restart is set for whatever reason we need to go back and start
2372 * the maximum number of inserts at a time we can handle, then loop 2391 * searching through the pending list again.
2373 * back and pick up where we left off 2392 *
2393 * We just inserted some extents, which could have resulted in new
2394 * blocks being allocated, which would result in new blocks needing
2395 * updates, so if all is set we _must_ restart to get the updated
2396 * blocks.
2374 */ 2397 */
2375 if (num_inserts == max_inserts) { 2398 if (restart || all) {
2376 INIT_LIST_HEAD(&insert_list);
2377 INIT_LIST_HEAD(&update_list);
2378 num_inserts = 0;
2379 goto again;
2380 }
2381
2382 /*
2383 * again, if we need to make absolutely sure there are no more pending
2384 * extent operations left and we know that we skipped some, go back to
2385 * the beginning and do it all again
2386 */
2387 if (all && skipped) {
2388 INIT_LIST_HEAD(&insert_list); 2399 INIT_LIST_HEAD(&insert_list);
2389 INIT_LIST_HEAD(&update_list); 2400 INIT_LIST_HEAD(&update_list);
2390 search = 0; 2401 search = 0;
2391 skipped = 0; 2402 restart = 0;
2392 num_inserts = 0; 2403 num_inserts = 0;
2393 goto again; 2404 goto again;
2394 } 2405 }
@@ -2709,6 +2720,8 @@ again:
2709 goto again; 2720 goto again;
2710 } 2721 }
2711 2722
2723 if (!err)
2724 finish_current_insert(trans, extent_root, 0);
2712 return err; 2725 return err;
2713} 2726}
2714 2727
@@ -2859,7 +2872,8 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
2859 2872
2860 if (data & BTRFS_BLOCK_GROUP_METADATA) { 2873 if (data & BTRFS_BLOCK_GROUP_METADATA) {
2861 last_ptr = &root->fs_info->last_alloc; 2874 last_ptr = &root->fs_info->last_alloc;
2862 empty_cluster = 64 * 1024; 2875 if (!btrfs_test_opt(root, SSD))
2876 empty_cluster = 64 * 1024;
2863 } 2877 }
2864 2878
2865 if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) 2879 if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD))
@@ -3402,7 +3416,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
3402 3416
3403struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, 3417struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
3404 struct btrfs_root *root, 3418 struct btrfs_root *root,
3405 u64 bytenr, u32 blocksize) 3419 u64 bytenr, u32 blocksize,
3420 int level)
3406{ 3421{
3407 struct extent_buffer *buf; 3422 struct extent_buffer *buf;
3408 3423
@@ -3410,6 +3425,7 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
3410 if (!buf) 3425 if (!buf)
3411 return ERR_PTR(-ENOMEM); 3426 return ERR_PTR(-ENOMEM);
3412 btrfs_set_header_generation(buf, trans->transid); 3427 btrfs_set_header_generation(buf, trans->transid);
3428 btrfs_set_buffer_lockdep_class(buf, level);
3413 btrfs_tree_lock(buf); 3429 btrfs_tree_lock(buf);
3414 clean_tree_block(trans, root, buf); 3430 clean_tree_block(trans, root, buf);
3415 3431
@@ -3453,7 +3469,8 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
3453 return ERR_PTR(ret); 3469 return ERR_PTR(ret);
3454 } 3470 }
3455 3471
3456 buf = btrfs_init_new_buffer(trans, root, ins.objectid, blocksize); 3472 buf = btrfs_init_new_buffer(trans, root, ins.objectid,
3473 blocksize, level);
3457 return buf; 3474 return buf;
3458} 3475}
3459 3476
@@ -5641,7 +5658,9 @@ static noinline int relocate_one_extent(struct btrfs_root *extent_root,
5641 prev_block = block_start; 5658 prev_block = block_start;
5642 } 5659 }
5643 5660
5661 mutex_lock(&extent_root->fs_info->trans_mutex);
5644 btrfs_record_root_in_trans(found_root); 5662 btrfs_record_root_in_trans(found_root);
5663 mutex_unlock(&extent_root->fs_info->trans_mutex);
5645 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 5664 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
5646 /* 5665 /*
5647 * try to update data extent references while 5666 * try to update data extent references while
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 37d43b516b79..ebe6b29e6069 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -415,8 +415,6 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
415 415
416 node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node); 416 node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node);
417 if (node) { 417 if (node) {
418 struct extent_state *found;
419 found = rb_entry(node, struct extent_state, rb_node);
420 free_extent_state(prealloc); 418 free_extent_state(prealloc);
421 return -EEXIST; 419 return -EEXIST;
422 } 420 }
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 2aa79873eb46..cc7334d833c9 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -84,7 +84,6 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans,
84 search_key.type = 0; 84 search_key.type = 0;
85 search_key.offset = 0; 85 search_key.offset = 0;
86 86
87 btrfs_init_path(path);
88 start_found = 0; 87 start_found = 0;
89 ret = btrfs_search_slot(trans, root, &search_key, path, 0, 0); 88 ret = btrfs_search_slot(trans, root, &search_key, path, 0, 0);
90 if (ret < 0) 89 if (ret < 0)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8f0706210a47..3cee77ae03c8 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2531,8 +2531,6 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2531 key.offset = (u64)-1; 2531 key.offset = (u64)-1;
2532 key.type = (u8)-1; 2532 key.type = (u8)-1;
2533 2533
2534 btrfs_init_path(path);
2535
2536search_again: 2534search_again:
2537 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 2535 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2538 if (ret < 0) 2536 if (ret < 0)
@@ -4263,7 +4261,7 @@ static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
4263{ 4261{
4264 if (PageWriteback(page) || PageDirty(page)) 4262 if (PageWriteback(page) || PageDirty(page))
4265 return 0; 4263 return 0;
4266 return __btrfs_releasepage(page, gfp_flags); 4264 return __btrfs_releasepage(page, gfp_flags & GFP_NOFS);
4267} 4265}
4268 4266
4269static void btrfs_invalidatepage(struct page *page, unsigned long offset) 4267static void btrfs_invalidatepage(struct page *page, unsigned long offset)
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 9ebe9385129b..85506c4a3af7 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -25,21 +25,10 @@
25#include "extent_io.h" 25#include "extent_io.h"
26#include "locking.h" 26#include "locking.h"
27 27
28/*
29 * btrfs_header_level() isn't free, so don't call it when lockdep isn't
30 * on
31 */
32#ifdef CONFIG_DEBUG_LOCK_ALLOC
33static inline void spin_nested(struct extent_buffer *eb)
34{
35 spin_lock_nested(&eb->lock, BTRFS_MAX_LEVEL - btrfs_header_level(eb));
36}
37#else
38static inline void spin_nested(struct extent_buffer *eb) 28static inline void spin_nested(struct extent_buffer *eb)
39{ 29{
40 spin_lock(&eb->lock); 30 spin_lock(&eb->lock);
41} 31}
42#endif
43 32
44/* 33/*
45 * Setting a lock to blocking will drop the spinlock and set the 34 * Setting a lock to blocking will drop the spinlock and set the
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index f3fd7e2cbc38..19a4daf03ccb 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -379,7 +379,6 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
379 btrfs_start_delalloc_inodes(root); 379 btrfs_start_delalloc_inodes(root);
380 btrfs_wait_ordered_extents(root, 0); 380 btrfs_wait_ordered_extents(root, 0);
381 381
382 btrfs_clean_old_snapshots(root);
383 trans = btrfs_start_transaction(root, 1); 382 trans = btrfs_start_transaction(root, 1);
384 ret = btrfs_commit_transaction(trans, root); 383 ret = btrfs_commit_transaction(trans, root);
385 sb->s_dirt = 0; 384 sb->s_dirt = 0;
@@ -511,6 +510,10 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
511 struct btrfs_root *root = btrfs_sb(sb); 510 struct btrfs_root *root = btrfs_sb(sb);
512 int ret; 511 int ret;
513 512
513 ret = btrfs_parse_options(root, data);
514 if (ret)
515 return -EINVAL;
516
514 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) 517 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
515 return 0; 518 return 0;
516 519
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 919172de5c9a..4112d53d4f4d 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -688,7 +688,9 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root,
688 num_bytes -= btrfs_root_used(&dirty->root->root_item); 688 num_bytes -= btrfs_root_used(&dirty->root->root_item);
689 bytes_used = btrfs_root_used(&root->root_item); 689 bytes_used = btrfs_root_used(&root->root_item);
690 if (num_bytes) { 690 if (num_bytes) {
691 mutex_lock(&root->fs_info->trans_mutex);
691 btrfs_record_root_in_trans(root); 692 btrfs_record_root_in_trans(root);
693 mutex_unlock(&root->fs_info->trans_mutex);
692 btrfs_set_root_used(&root->root_item, 694 btrfs_set_root_used(&root->root_item,
693 bytes_used - num_bytes); 695 bytes_used - num_bytes);
694 } 696 }
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 20794290256b..9c462fbd60fa 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -2832,7 +2832,9 @@ again:
2832 BUG_ON(!wc.replay_dest); 2832 BUG_ON(!wc.replay_dest);
2833 2833
2834 wc.replay_dest->log_root = log; 2834 wc.replay_dest->log_root = log;
2835 mutex_lock(&fs_info->trans_mutex);
2835 btrfs_record_root_in_trans(wc.replay_dest); 2836 btrfs_record_root_in_trans(wc.replay_dest);
2837 mutex_unlock(&fs_info->trans_mutex);
2836 ret = walk_log_tree(trans, log, &wc); 2838 ret = walk_log_tree(trans, log, &wc);
2837 BUG_ON(ret); 2839 BUG_ON(ret);
2838 2840
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index bcd14ebccae1..1316139bf9e8 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2894,10 +2894,6 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
2894 free_extent_map(em); 2894 free_extent_map(em);
2895 } 2895 }
2896 2896
2897 map = kzalloc(sizeof(*map), GFP_NOFS);
2898 if (!map)
2899 return -ENOMEM;
2900
2901 em = alloc_extent_map(GFP_NOFS); 2897 em = alloc_extent_map(GFP_NOFS);
2902 if (!em) 2898 if (!em)
2903 return -ENOMEM; 2899 return -ENOMEM;
@@ -3106,6 +3102,8 @@ int btrfs_read_sys_array(struct btrfs_root *root)
3106 if (!sb) 3102 if (!sb)
3107 return -ENOMEM; 3103 return -ENOMEM;
3108 btrfs_set_buffer_uptodate(sb); 3104 btrfs_set_buffer_uptodate(sb);
3105 btrfs_set_buffer_lockdep_class(sb, 0);
3106
3109 write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE); 3107 write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
3110 array_size = btrfs_super_sys_array_size(super_copy); 3108 array_size = btrfs_super_sys_array_size(super_copy);
3111 3109
diff --git a/fs/buffer.c b/fs/buffer.c
index 665d446b25bc..9f697419ed8e 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -777,6 +777,7 @@ static int __set_page_dirty(struct page *page,
777 __inc_zone_page_state(page, NR_FILE_DIRTY); 777 __inc_zone_page_state(page, NR_FILE_DIRTY);
778 __inc_bdi_stat(mapping->backing_dev_info, 778 __inc_bdi_stat(mapping->backing_dev_info,
779 BDI_RECLAIMABLE); 779 BDI_RECLAIMABLE);
780 task_dirty_inc(current);
780 task_io_account_write(PAGE_CACHE_SIZE); 781 task_io_account_write(PAGE_CACHE_SIZE);
781 } 782 }
782 radix_tree_tag_set(&mapping->page_tree, 783 radix_tree_tag_set(&mapping->page_tree,
@@ -3108,7 +3109,7 @@ int sync_dirty_buffer(struct buffer_head *bh)
3108 if (test_clear_buffer_dirty(bh)) { 3109 if (test_clear_buffer_dirty(bh)) {
3109 get_bh(bh); 3110 get_bh(bh);
3110 bh->b_end_io = end_buffer_write_sync; 3111 bh->b_end_io = end_buffer_write_sync;
3111 ret = submit_bh(WRITE_SYNC, bh); 3112 ret = submit_bh(WRITE, bh);
3112 wait_on_buffer(bh); 3113 wait_on_buffer(bh);
3113 if (buffer_eopnotsupp(bh)) { 3114 if (buffer_eopnotsupp(bh)) {
3114 clear_buffer_eopnotsupp(bh); 3115 clear_buffer_eopnotsupp(bh);
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 9c6d815dd191..39bd4d38e889 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1938,6 +1938,8 @@ ULONG_IOCTL(SET_BITMAP_FILE)
1938/* Big K */ 1938/* Big K */
1939COMPATIBLE_IOCTL(PIO_FONT) 1939COMPATIBLE_IOCTL(PIO_FONT)
1940COMPATIBLE_IOCTL(GIO_FONT) 1940COMPATIBLE_IOCTL(GIO_FONT)
1941COMPATIBLE_IOCTL(PIO_CMAP)
1942COMPATIBLE_IOCTL(GIO_CMAP)
1941ULONG_IOCTL(KDSIGACCEPT) 1943ULONG_IOCTL(KDSIGACCEPT)
1942COMPATIBLE_IOCTL(KDGETKEYCODE) 1944COMPATIBLE_IOCTL(KDGETKEYCODE)
1943COMPATIBLE_IOCTL(KDSETKEYCODE) 1945COMPATIBLE_IOCTL(KDSETKEYCODE)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index aafc9eba1c25..b0c87dce66a3 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -868,7 +868,7 @@ static inline unsigned ext4_rec_len_from_disk(__le16 dlen)
868{ 868{
869 unsigned len = le16_to_cpu(dlen); 869 unsigned len = le16_to_cpu(dlen);
870 870
871 if (len == EXT4_MAX_REC_LEN) 871 if (len == EXT4_MAX_REC_LEN || len == 0)
872 return 1 << 16; 872 return 1 << 16;
873 return len; 873 return len;
874} 874}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 03ba20be1329..cbd2ca99d113 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -47,8 +47,10 @@
47static inline int ext4_begin_ordered_truncate(struct inode *inode, 47static inline int ext4_begin_ordered_truncate(struct inode *inode,
48 loff_t new_size) 48 loff_t new_size)
49{ 49{
50 return jbd2_journal_begin_ordered_truncate(&EXT4_I(inode)->jinode, 50 return jbd2_journal_begin_ordered_truncate(
51 new_size); 51 EXT4_SB(inode->i_sb)->s_journal,
52 &EXT4_I(inode)->jinode,
53 new_size);
52} 54}
53 55
54static void ext4_invalidatepage(struct page *page, unsigned long offset); 56static void ext4_invalidatepage(struct page *page, unsigned long offset);
@@ -2437,6 +2439,7 @@ static int ext4_da_writepages(struct address_space *mapping,
2437 int no_nrwrite_index_update; 2439 int no_nrwrite_index_update;
2438 int pages_written = 0; 2440 int pages_written = 0;
2439 long pages_skipped; 2441 long pages_skipped;
2442 int range_cyclic, cycled = 1, io_done = 0;
2440 int needed_blocks, ret = 0, nr_to_writebump = 0; 2443 int needed_blocks, ret = 0, nr_to_writebump = 0;
2441 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); 2444 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
2442 2445
@@ -2488,9 +2491,15 @@ static int ext4_da_writepages(struct address_space *mapping,
2488 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) 2491 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2489 range_whole = 1; 2492 range_whole = 1;
2490 2493
2491 if (wbc->range_cyclic) 2494 range_cyclic = wbc->range_cyclic;
2495 if (wbc->range_cyclic) {
2492 index = mapping->writeback_index; 2496 index = mapping->writeback_index;
2493 else 2497 if (index)
2498 cycled = 0;
2499 wbc->range_start = index << PAGE_CACHE_SHIFT;
2500 wbc->range_end = LLONG_MAX;
2501 wbc->range_cyclic = 0;
2502 } else
2494 index = wbc->range_start >> PAGE_CACHE_SHIFT; 2503 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2495 2504
2496 mpd.wbc = wbc; 2505 mpd.wbc = wbc;
@@ -2504,6 +2513,7 @@ static int ext4_da_writepages(struct address_space *mapping,
2504 wbc->no_nrwrite_index_update = 1; 2513 wbc->no_nrwrite_index_update = 1;
2505 pages_skipped = wbc->pages_skipped; 2514 pages_skipped = wbc->pages_skipped;
2506 2515
2516retry:
2507 while (!ret && wbc->nr_to_write > 0) { 2517 while (!ret && wbc->nr_to_write > 0) {
2508 2518
2509 /* 2519 /*
@@ -2546,6 +2556,7 @@ static int ext4_da_writepages(struct address_space *mapping,
2546 pages_written += mpd.pages_written; 2556 pages_written += mpd.pages_written;
2547 wbc->pages_skipped = pages_skipped; 2557 wbc->pages_skipped = pages_skipped;
2548 ret = 0; 2558 ret = 0;
2559 io_done = 1;
2549 } else if (wbc->nr_to_write) 2560 } else if (wbc->nr_to_write)
2550 /* 2561 /*
2551 * There is no more writeout needed 2562 * There is no more writeout needed
@@ -2554,6 +2565,13 @@ static int ext4_da_writepages(struct address_space *mapping,
2554 */ 2565 */
2555 break; 2566 break;
2556 } 2567 }
2568 if (!io_done && !cycled) {
2569 cycled = 1;
2570 index = 0;
2571 wbc->range_start = index << PAGE_CACHE_SHIFT;
2572 wbc->range_end = mapping->writeback_index - 1;
2573 goto retry;
2574 }
2557 if (pages_skipped != wbc->pages_skipped) 2575 if (pages_skipped != wbc->pages_skipped)
2558 printk(KERN_EMERG "This should not happen leaving %s " 2576 printk(KERN_EMERG "This should not happen leaving %s "
2559 "with nr_to_write = %ld ret = %d\n", 2577 "with nr_to_write = %ld ret = %d\n",
@@ -2561,6 +2579,7 @@ static int ext4_da_writepages(struct address_space *mapping,
2561 2579
2562 /* Update index */ 2580 /* Update index */
2563 index += pages_written; 2581 index += pages_written;
2582 wbc->range_cyclic = range_cyclic;
2564 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) 2583 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2565 /* 2584 /*
2566 * set the writeback_index so that range_cyclic 2585 * set the writeback_index so that range_cyclic
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index deba54f6cbed..4415beeb0b62 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3693,6 +3693,8 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3693 pa->pa_free = pa->pa_len; 3693 pa->pa_free = pa->pa_len;
3694 atomic_set(&pa->pa_count, 1); 3694 atomic_set(&pa->pa_count, 1);
3695 spin_lock_init(&pa->pa_lock); 3695 spin_lock_init(&pa->pa_lock);
3696 INIT_LIST_HEAD(&pa->pa_inode_list);
3697 INIT_LIST_HEAD(&pa->pa_group_list);
3696 pa->pa_deleted = 0; 3698 pa->pa_deleted = 0;
3697 pa->pa_linear = 0; 3699 pa->pa_linear = 0;
3698 3700
@@ -3755,6 +3757,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
3755 atomic_set(&pa->pa_count, 1); 3757 atomic_set(&pa->pa_count, 1);
3756 spin_lock_init(&pa->pa_lock); 3758 spin_lock_init(&pa->pa_lock);
3757 INIT_LIST_HEAD(&pa->pa_inode_list); 3759 INIT_LIST_HEAD(&pa->pa_inode_list);
3760 INIT_LIST_HEAD(&pa->pa_group_list);
3758 pa->pa_deleted = 0; 3761 pa->pa_deleted = 0;
3759 pa->pa_linear = 1; 3762 pa->pa_linear = 1;
3760 3763
@@ -4476,23 +4479,26 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac)
4476 pa->pa_free -= ac->ac_b_ex.fe_len; 4479 pa->pa_free -= ac->ac_b_ex.fe_len;
4477 pa->pa_len -= ac->ac_b_ex.fe_len; 4480 pa->pa_len -= ac->ac_b_ex.fe_len;
4478 spin_unlock(&pa->pa_lock); 4481 spin_unlock(&pa->pa_lock);
4479 /*
4480 * We want to add the pa to the right bucket.
4481 * Remove it from the list and while adding
4482 * make sure the list to which we are adding
4483 * doesn't grow big.
4484 */
4485 if (likely(pa->pa_free)) {
4486 spin_lock(pa->pa_obj_lock);
4487 list_del_rcu(&pa->pa_inode_list);
4488 spin_unlock(pa->pa_obj_lock);
4489 ext4_mb_add_n_trim(ac);
4490 }
4491 } 4482 }
4492 ext4_mb_put_pa(ac, ac->ac_sb, pa);
4493 } 4483 }
4494 if (ac->alloc_semp) 4484 if (ac->alloc_semp)
4495 up_read(ac->alloc_semp); 4485 up_read(ac->alloc_semp);
4486 if (pa) {
4487 /*
4488 * We want to add the pa to the right bucket.
4489 * Remove it from the list and while adding
4490 * make sure the list to which we are adding
4491 * doesn't grow big. We need to release
4492 * alloc_semp before calling ext4_mb_add_n_trim()
4493 */
4494 if (pa->pa_linear && likely(pa->pa_free)) {
4495 spin_lock(pa->pa_obj_lock);
4496 list_del_rcu(&pa->pa_inode_list);
4497 spin_unlock(pa->pa_obj_lock);
4498 ext4_mb_add_n_trim(ac);
4499 }
4500 ext4_mb_put_pa(ac, ac->ac_sb, pa);
4501 }
4496 if (ac->ac_bitmap_page) 4502 if (ac->ac_bitmap_page)
4497 page_cache_release(ac->ac_bitmap_page); 4503 page_cache_release(ac->ac_bitmap_page);
4498 if (ac->ac_buddy_page) 4504 if (ac->ac_buddy_page)
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 734abca25e35..fe64d9f79852 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -481,7 +481,7 @@ int ext4_ext_migrate(struct inode *inode)
481 + 1); 481 + 1);
482 if (IS_ERR(handle)) { 482 if (IS_ERR(handle)) {
483 retval = PTR_ERR(handle); 483 retval = PTR_ERR(handle);
484 goto err_out; 484 return retval;
485 } 485 }
486 tmp_inode = ext4_new_inode(handle, 486 tmp_inode = ext4_new_inode(handle,
487 inode->i_sb->s_root->d_inode, 487 inode->i_sb->s_root->d_inode,
@@ -489,8 +489,7 @@ int ext4_ext_migrate(struct inode *inode)
489 if (IS_ERR(tmp_inode)) { 489 if (IS_ERR(tmp_inode)) {
490 retval = -ENOMEM; 490 retval = -ENOMEM;
491 ext4_journal_stop(handle); 491 ext4_journal_stop(handle);
492 tmp_inode = NULL; 492 return retval;
493 goto err_out;
494 } 493 }
495 i_size_write(tmp_inode, i_size_read(inode)); 494 i_size_write(tmp_inode, i_size_read(inode));
496 /* 495 /*
@@ -618,8 +617,7 @@ err_out:
618 617
619 ext4_journal_stop(handle); 618 ext4_journal_stop(handle);
620 619
621 if (tmp_inode) 620 iput(tmp_inode);
622 iput(tmp_inode);
623 621
624 return retval; 622 return retval;
625} 623}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index e5f06a5f045e..a5732c58f676 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3046,14 +3046,17 @@ static void ext4_write_super(struct super_block *sb)
3046static int ext4_sync_fs(struct super_block *sb, int wait) 3046static int ext4_sync_fs(struct super_block *sb, int wait)
3047{ 3047{
3048 int ret = 0; 3048 int ret = 0;
3049 tid_t target;
3049 3050
3050 trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait); 3051 trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait);
3051 sb->s_dirt = 0; 3052 sb->s_dirt = 0;
3052 if (EXT4_SB(sb)->s_journal) { 3053 if (EXT4_SB(sb)->s_journal) {
3053 if (wait) 3054 if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal,
3054 ret = ext4_force_commit(sb); 3055 &target)) {
3055 else 3056 if (wait)
3056 jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, NULL); 3057 jbd2_log_wait_commit(EXT4_SB(sb)->s_journal,
3058 target);
3059 }
3057 } else { 3060 } else {
3058 ext4_commit_super(sb, EXT4_SB(sb)->s_es, wait); 3061 ext4_commit_super(sb, EXT4_SB(sb)->s_es, wait);
3059 } 3062 }
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index eb343008eded..58144102bf25 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -450,7 +450,7 @@ int __jbd2_log_space_left(journal_t *journal)
450} 450}
451 451
452/* 452/*
453 * Called under j_state_lock. Returns true if a transaction was started. 453 * Called under j_state_lock. Returns true if a transaction commit was started.
454 */ 454 */
455int __jbd2_log_start_commit(journal_t *journal, tid_t target) 455int __jbd2_log_start_commit(journal_t *journal, tid_t target)
456{ 456{
@@ -518,7 +518,8 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
518 518
519/* 519/*
520 * Start a commit of the current running transaction (if any). Returns true 520 * Start a commit of the current running transaction (if any). Returns true
521 * if a transaction was started, and fills its tid in at *ptid 521 * if a transaction is going to be committed (or is currently already
522 * committing), and fills its tid in at *ptid
522 */ 523 */
523int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid) 524int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
524{ 525{
@@ -528,15 +529,19 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
528 if (journal->j_running_transaction) { 529 if (journal->j_running_transaction) {
529 tid_t tid = journal->j_running_transaction->t_tid; 530 tid_t tid = journal->j_running_transaction->t_tid;
530 531
531 ret = __jbd2_log_start_commit(journal, tid); 532 __jbd2_log_start_commit(journal, tid);
532 if (ret && ptid) 533 /* There's a running transaction and we've just made sure
534 * it's commit has been scheduled. */
535 if (ptid)
533 *ptid = tid; 536 *ptid = tid;
534 } else if (journal->j_committing_transaction && ptid) { 537 ret = 1;
538 } else if (journal->j_committing_transaction) {
535 /* 539 /*
536 * If ext3_write_super() recently started a commit, then we 540 * If ext3_write_super() recently started a commit, then we
537 * have to wait for completion of that transaction 541 * have to wait for completion of that transaction
538 */ 542 */
539 *ptid = journal->j_committing_transaction->t_tid; 543 if (ptid)
544 *ptid = journal->j_committing_transaction->t_tid;
540 ret = 1; 545 ret = 1;
541 } 546 }
542 spin_unlock(&journal->j_state_lock); 547 spin_unlock(&journal->j_state_lock);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 46b4e347ed7d..28ce21d8598e 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -2129,26 +2129,46 @@ done:
2129} 2129}
2130 2130
2131/* 2131/*
2132 * This function must be called when inode is journaled in ordered mode 2132 * File truncate and transaction commit interact with each other in a
2133 * before truncation happens. It starts writeout of truncated part in 2133 * non-trivial way. If a transaction writing data block A is
2134 * case it is in the committing transaction so that we stand to ordered 2134 * committing, we cannot discard the data by truncate until we have
2135 * mode consistency guarantees. 2135 * written them. Otherwise if we crashed after the transaction with
2136 * write has committed but before the transaction with truncate has
2137 * committed, we could see stale data in block A. This function is a
2138 * helper to solve this problem. It starts writeout of the truncated
2139 * part in case it is in the committing transaction.
2140 *
2141 * Filesystem code must call this function when inode is journaled in
2142 * ordered mode before truncation happens and after the inode has been
2143 * placed on orphan list with the new inode size. The second condition
2144 * avoids the race that someone writes new data and we start
2145 * committing the transaction after this function has been called but
2146 * before a transaction for truncate is started (and furthermore it
2147 * allows us to optimize the case where the addition to orphan list
2148 * happens in the same transaction as write --- we don't have to write
2149 * any data in such case).
2136 */ 2150 */
2137int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode, 2151int jbd2_journal_begin_ordered_truncate(journal_t *journal,
2152 struct jbd2_inode *jinode,
2138 loff_t new_size) 2153 loff_t new_size)
2139{ 2154{
2140 journal_t *journal; 2155 transaction_t *inode_trans, *commit_trans;
2141 transaction_t *commit_trans;
2142 int ret = 0; 2156 int ret = 0;
2143 2157
2144 if (!inode->i_transaction && !inode->i_next_transaction) 2158 /* This is a quick check to avoid locking if not necessary */
2159 if (!jinode->i_transaction)
2145 goto out; 2160 goto out;
2146 journal = inode->i_transaction->t_journal; 2161 /* Locks are here just to force reading of recent values, it is
2162 * enough that the transaction was not committing before we started
2163 * a transaction adding the inode to orphan list */
2147 spin_lock(&journal->j_state_lock); 2164 spin_lock(&journal->j_state_lock);
2148 commit_trans = journal->j_committing_transaction; 2165 commit_trans = journal->j_committing_transaction;
2149 spin_unlock(&journal->j_state_lock); 2166 spin_unlock(&journal->j_state_lock);
2150 if (inode->i_transaction == commit_trans) { 2167 spin_lock(&journal->j_list_lock);
2151 ret = filemap_fdatawrite_range(inode->i_vfs_inode->i_mapping, 2168 inode_trans = jinode->i_transaction;
2169 spin_unlock(&journal->j_list_lock);
2170 if (inode_trans == commit_trans) {
2171 ret = filemap_fdatawrite_range(jinode->i_vfs_inode->i_mapping,
2152 new_size, LLONG_MAX); 2172 new_size, LLONG_MAX);
2153 if (ret) 2173 if (ret)
2154 jbd2_journal_abort(journal, ret); 2174 jbd2_journal_abort(journal, ret);
diff --git a/fs/namespace.c b/fs/namespace.c
index 228d8c4bfd18..06f8e63f6cb1 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -614,9 +614,11 @@ static inline void __mntput(struct vfsmount *mnt)
614 */ 614 */
615 for_each_possible_cpu(cpu) { 615 for_each_possible_cpu(cpu) {
616 struct mnt_writer *cpu_writer = &per_cpu(mnt_writers, cpu); 616 struct mnt_writer *cpu_writer = &per_cpu(mnt_writers, cpu);
617 if (cpu_writer->mnt != mnt)
618 continue;
619 spin_lock(&cpu_writer->lock); 617 spin_lock(&cpu_writer->lock);
618 if (cpu_writer->mnt != mnt) {
619 spin_unlock(&cpu_writer->lock);
620 continue;
621 }
620 atomic_add(cpu_writer->count, &mnt->__mnt_writers); 622 atomic_add(cpu_writer->count, &mnt->__mnt_writers);
621 cpu_writer->count = 0; 623 cpu_writer->count = 0;
622 /* 624 /*
diff --git a/fs/notify/inotify/inotify.c b/fs/notify/inotify/inotify.c
index dae3f28f30d4..331f2e88e284 100644
--- a/fs/notify/inotify/inotify.c
+++ b/fs/notify/inotify/inotify.c
@@ -156,7 +156,7 @@ static int inotify_handle_get_wd(struct inotify_handle *ih,
156 int ret; 156 int ret;
157 157
158 do { 158 do {
159 if (unlikely(!idr_pre_get(&ih->idr, GFP_KERNEL))) 159 if (unlikely(!idr_pre_get(&ih->idr, GFP_NOFS)))
160 return -ENOSPC; 160 return -ENOSPC;
161 ret = idr_get_new_above(&ih->idr, watch, ih->last_wd+1, &watch->wd); 161 ret = idr_get_new_above(&ih->idr, watch, ih->last_wd+1, &watch->wd);
162 } while (ret == -EAGAIN); 162 } while (ret == -EAGAIN);
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 3c3532e1307c..172850a9a12a 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -513,8 +513,10 @@ static inline int ocfs2_jbd2_file_inode(handle_t *handle, struct inode *inode)
513static inline int ocfs2_begin_ordered_truncate(struct inode *inode, 513static inline int ocfs2_begin_ordered_truncate(struct inode *inode,
514 loff_t new_size) 514 loff_t new_size)
515{ 515{
516 return jbd2_journal_begin_ordered_truncate(&OCFS2_I(inode)->ip_jinode, 516 return jbd2_journal_begin_ordered_truncate(
517 new_size); 517 OCFS2_SB(inode->i_sb)->journal->j_journal,
518 &OCFS2_I(inode)->ip_jinode,
519 new_size);
518} 520}
519 521
520#endif /* OCFS2_JOURNAL_H */ 522#endif /* OCFS2_JOURNAL_H */
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 5267098532bf..a1a4cfe19210 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -48,8 +48,16 @@ int seq_open(struct file *file, const struct seq_operations *op)
48 */ 48 */
49 file->f_version = 0; 49 file->f_version = 0;
50 50
51 /* SEQ files support lseek, but not pread/pwrite */ 51 /*
52 file->f_mode &= ~(FMODE_PREAD | FMODE_PWRITE); 52 * seq_files support lseek() and pread(). They do not implement
53 * write() at all, but we clear FMODE_PWRITE here for historical
54 * reasons.
55 *
56 * If a client of seq_files a) implements file.write() and b) wishes to
57 * support pwrite() then that client will need to implement its own
58 * file.open() which calls seq_open() and then sets FMODE_PWRITE.
59 */
60 file->f_mode &= ~FMODE_PWRITE;
53 return 0; 61 return 0;
54} 62}
55EXPORT_SYMBOL(seq_open); 63EXPORT_SYMBOL(seq_open);
@@ -131,6 +139,22 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
131 int err = 0; 139 int err = 0;
132 140
133 mutex_lock(&m->lock); 141 mutex_lock(&m->lock);
142
143 /* Don't assume *ppos is where we left it */
144 if (unlikely(*ppos != m->read_pos)) {
145 m->read_pos = *ppos;
146 while ((err = traverse(m, *ppos)) == -EAGAIN)
147 ;
148 if (err) {
149 /* With prejudice... */
150 m->read_pos = 0;
151 m->version = 0;
152 m->index = 0;
153 m->count = 0;
154 goto Done;
155 }
156 }
157
134 /* 158 /*
135 * seq_file->op->..m_start/m_stop/m_next may do special actions 159 * seq_file->op->..m_start/m_stop/m_next may do special actions
136 * or optimisations based on the file->f_version, so we want to 160 * or optimisations based on the file->f_version, so we want to
@@ -230,8 +254,10 @@ Fill:
230Done: 254Done:
231 if (!copied) 255 if (!copied)
232 copied = err; 256 copied = err;
233 else 257 else {
234 *ppos += copied; 258 *ppos += copied;
259 m->read_pos += copied;
260 }
235 file->f_version = m->version; 261 file->f_version = m->version;
236 mutex_unlock(&m->lock); 262 mutex_unlock(&m->lock);
237 return copied; 263 return copied;
@@ -266,16 +292,18 @@ loff_t seq_lseek(struct file *file, loff_t offset, int origin)
266 if (offset < 0) 292 if (offset < 0)
267 break; 293 break;
268 retval = offset; 294 retval = offset;
269 if (offset != file->f_pos) { 295 if (offset != m->read_pos) {
270 while ((retval=traverse(m, offset)) == -EAGAIN) 296 while ((retval=traverse(m, offset)) == -EAGAIN)
271 ; 297 ;
272 if (retval) { 298 if (retval) {
273 /* with extreme prejudice... */ 299 /* with extreme prejudice... */
274 file->f_pos = 0; 300 file->f_pos = 0;
301 m->read_pos = 0;
275 m->version = 0; 302 m->version = 0;
276 m->index = 0; 303 m->index = 0;
277 m->count = 0; 304 m->count = 0;
278 } else { 305 } else {
306 m->read_pos = offset;
279 retval = file->f_pos = offset; 307 retval = file->f_pos = offset;
280 } 308 }
281 } 309 }
diff --git a/fs/super.c b/fs/super.c
index 61dce001dd57..8349ed6b1412 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -82,7 +82,22 @@ static struct super_block *alloc_super(struct file_system_type *type)
82 * lock ordering than usbfs: 82 * lock ordering than usbfs:
83 */ 83 */
84 lockdep_set_class(&s->s_lock, &type->s_lock_key); 84 lockdep_set_class(&s->s_lock, &type->s_lock_key);
85 down_write(&s->s_umount); 85 /*
86 * sget() can have s_umount recursion.
87 *
88 * When it cannot find a suitable sb, it allocates a new
89 * one (this one), and tries again to find a suitable old
90 * one.
91 *
92 * In case that succeeds, it will acquire the s_umount
93 * lock of the old one. Since these are clearly distrinct
94 * locks, and this object isn't exposed yet, there's no
95 * risk of deadlocks.
96 *
97 * Annotate this by putting this lock in a different
98 * subclass.
99 */
100 down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING);
86 s->s_count = S_BIAS; 101 s->s_count = S_BIAS;
87 atomic_set(&s->s_active, 1); 102 atomic_set(&s->s_active, 1);
88 mutex_init(&s->s_vfs_rename_mutex); 103 mutex_init(&s->s_vfs_rename_mutex);
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 6a123b8ff3f5..b042bd7034b1 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -186,10 +186,9 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
186 BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC); 186 BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC);
187 BUILD_BUG_ON(TFD_NONBLOCK != O_NONBLOCK); 187 BUILD_BUG_ON(TFD_NONBLOCK != O_NONBLOCK);
188 188
189 if (flags & ~(TFD_CLOEXEC | TFD_NONBLOCK)) 189 if ((flags & ~TFD_CREATE_FLAGS) ||
190 return -EINVAL; 190 (clockid != CLOCK_MONOTONIC &&
191 if (clockid != CLOCK_MONOTONIC && 191 clockid != CLOCK_REALTIME))
192 clockid != CLOCK_REALTIME)
193 return -EINVAL; 192 return -EINVAL;
194 193
195 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 194 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
@@ -201,7 +200,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
201 hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS); 200 hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS);
202 201
203 ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx, 202 ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
204 flags & (O_CLOEXEC | O_NONBLOCK)); 203 flags & TFD_SHARED_FCNTL_FLAGS);
205 if (ufd < 0) 204 if (ufd < 0)
206 kfree(ctx); 205 kfree(ctx);
207 206
@@ -219,7 +218,8 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
219 if (copy_from_user(&ktmr, utmr, sizeof(ktmr))) 218 if (copy_from_user(&ktmr, utmr, sizeof(ktmr)))
220 return -EFAULT; 219 return -EFAULT;
221 220
222 if (!timespec_valid(&ktmr.it_value) || 221 if ((flags & ~TFD_SETTIME_FLAGS) ||
222 !timespec_valid(&ktmr.it_value) ||
223 !timespec_valid(&ktmr.it_interval)) 223 !timespec_valid(&ktmr.it_interval))
224 return -EINVAL; 224 return -EINVAL;
225 225