aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/bfs/inode.c2
-rw-r--r--fs/bio.c20
-rw-r--r--fs/btrfs/backref.c48
-rw-r--r--fs/btrfs/ctree.c1
-rw-r--r--fs/btrfs/extent-tree.c27
-rw-r--r--fs/btrfs/extent_io.c9
-rw-r--r--fs/btrfs/file.c62
-rw-r--r--fs/btrfs/inode.c52
-rw-r--r--fs/btrfs/scrub.c2
-rw-r--r--fs/btrfs/transaction.c8
-rw-r--r--fs/btrfs/transaction.h2
-rw-r--r--fs/btrfs/tree-log.c5
-rw-r--r--fs/cifs/cifsencrypt.c14
-rw-r--r--fs/cifs/cifsfs.c11
-rw-r--r--fs/cifs/cifsglob.h4
-rw-r--r--fs/cifs/cifsproto.h4
-rw-r--r--fs/cifs/connect.c7
-rw-r--r--fs/cifs/file.c1
-rw-r--r--fs/cifs/link.c84
-rw-r--r--fs/cifs/readdir.c8
-rw-r--r--fs/cifs/sess.c6
-rw-r--r--fs/cifs/smb1ops.c1
-rw-r--r--fs/cifs/smb2transport.c9
-rw-r--r--fs/dcache.c11
-rw-r--r--fs/debugfs/inode.c69
-rw-r--r--fs/dlm/user.c1
-rw-r--r--fs/efs/inode.c2
-rw-r--r--fs/exec.c4
-rw-r--r--fs/ext3/namei.c2
-rw-r--r--fs/ext4/balloc.c4
-rw-r--r--fs/ext4/ext4.h1
-rw-r--r--fs/ext4/ext4_jbd2.c8
-rw-r--r--fs/ext4/extents.c23
-rw-r--r--fs/ext4/extents_status.c73
-rw-r--r--fs/ext4/file.c21
-rw-r--r--fs/ext4/ialloc.c10
-rw-r--r--fs/ext4/inode.c95
-rw-r--r--fs/ext4/ioctl.c6
-rw-r--r--fs/ext4/mballoc.c11
-rw-r--r--fs/ext4/namei.c2
-rw-r--r--fs/ext4/page-io.c35
-rw-r--r--fs/ext4/super.c34
-rw-r--r--fs/fcntl.c4
-rw-r--r--fs/fuse/dir.c51
-rw-r--r--fs/gfs2/glock.c8
-rw-r--r--fs/gfs2/glops.c18
-rw-r--r--fs/gfs2/inode.c6
-rw-r--r--fs/gfs2/main.c2
-rw-r--r--fs/hugetlbfs/inode.c18
-rw-r--r--fs/lockd/clntlock.c13
-rw-r--r--fs/lockd/clntproc.c5
-rw-r--r--fs/lockd/svclock.c4
-rw-r--r--fs/namei.c10
-rw-r--r--fs/namespace.c2
-rw-r--r--fs/nfs/inode.c11
-rw-r--r--fs/nfs/nfs4proc.c8
-rw-r--r--fs/nfs/nfs4xdr.c21
-rw-r--r--fs/nfs/super.c4
-rw-r--r--fs/nfsd/nfs4proc.c4
-rw-r--r--fs/nfsd/nfs4state.c2
-rw-r--r--fs/nfsd/nfs4xdr.c5
-rw-r--r--fs/nfsd/nfsd.h1
-rw-r--r--fs/nfsd/nfssvc.c13
-rw-r--r--fs/nfsd/vfs.c5
-rw-r--r--fs/nilfs2/segbuf.c5
-rw-r--r--fs/ocfs2/aops.c2
-rw-r--r--fs/ocfs2/dir.c4
-rw-r--r--fs/ocfs2/file.c6
-rw-r--r--fs/ocfs2/journal.h2
-rw-r--r--fs/ocfs2/move_extents.c2
-rw-r--r--fs/ocfs2/refcounttree.c58
-rw-r--r--fs/ocfs2/refcounttree.h6
-rw-r--r--fs/open.c4
-rw-r--r--fs/proc/fd.c2
-rw-r--r--fs/proc/generic.c2
-rw-r--r--fs/proc/root.c4
-rw-r--r--fs/proc/task_mmu.c31
-rw-r--r--fs/proc/vmcore.c2
-rw-r--r--fs/reiserfs/procfs.c99
-rw-r--r--fs/reiserfs/super.c3
-rw-r--r--fs/super.c25
-rw-r--r--fs/sysfs/group.c70
-rw-r--r--fs/xfs/xfs_dinode.h3
-rw-r--r--fs/xfs/xfs_inode.c31
-rw-r--r--fs/xfs/xfs_log_recover.c13
85 files changed, 803 insertions, 585 deletions
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 5e376bb93419..8defc6b3f9a2 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -40,7 +40,7 @@ struct inode *bfs_iget(struct super_block *sb, unsigned long ino)
40 int block, off; 40 int block, off;
41 41
42 inode = iget_locked(sb, ino); 42 inode = iget_locked(sb, ino);
43 if (IS_ERR(inode)) 43 if (!inode)
44 return ERR_PTR(-ENOMEM); 44 return ERR_PTR(-ENOMEM);
45 if (!(inode->i_state & I_NEW)) 45 if (!(inode->i_state & I_NEW))
46 return inode; 46 return inode;
diff --git a/fs/bio.c b/fs/bio.c
index 94bbc04dba77..c5eae7251490 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -1045,12 +1045,22 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
1045int bio_uncopy_user(struct bio *bio) 1045int bio_uncopy_user(struct bio *bio)
1046{ 1046{
1047 struct bio_map_data *bmd = bio->bi_private; 1047 struct bio_map_data *bmd = bio->bi_private;
1048 int ret = 0; 1048 struct bio_vec *bvec;
1049 int ret = 0, i;
1049 1050
1050 if (!bio_flagged(bio, BIO_NULL_MAPPED)) 1051 if (!bio_flagged(bio, BIO_NULL_MAPPED)) {
1051 ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, 1052 /*
1052 bmd->nr_sgvecs, bio_data_dir(bio) == READ, 1053 * if we're in a workqueue, the request is orphaned, so
1053 0, bmd->is_our_pages); 1054 * don't copy into a random user address space, just free.
1055 */
1056 if (current->mm)
1057 ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs,
1058 bmd->nr_sgvecs, bio_data_dir(bio) == READ,
1059 0, bmd->is_our_pages);
1060 else if (bmd->is_our_pages)
1061 bio_for_each_segment_all(bvec, bio, i)
1062 __free_page(bvec->bv_page);
1063 }
1054 bio_free_map_data(bmd); 1064 bio_free_map_data(bmd);
1055 bio_put(bio); 1065 bio_put(bio);
1056 return ret; 1066 return ret;
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index eaf133384a8f..8bc5e8ccb091 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -36,16 +36,23 @@ static int check_extent_in_eb(struct btrfs_key *key, struct extent_buffer *eb,
36 u64 extent_item_pos, 36 u64 extent_item_pos,
37 struct extent_inode_elem **eie) 37 struct extent_inode_elem **eie)
38{ 38{
39 u64 data_offset; 39 u64 offset = 0;
40 u64 data_len;
41 struct extent_inode_elem *e; 40 struct extent_inode_elem *e;
42 41
43 data_offset = btrfs_file_extent_offset(eb, fi); 42 if (!btrfs_file_extent_compression(eb, fi) &&
44 data_len = btrfs_file_extent_num_bytes(eb, fi); 43 !btrfs_file_extent_encryption(eb, fi) &&
44 !btrfs_file_extent_other_encoding(eb, fi)) {
45 u64 data_offset;
46 u64 data_len;
45 47
46 if (extent_item_pos < data_offset || 48 data_offset = btrfs_file_extent_offset(eb, fi);
47 extent_item_pos >= data_offset + data_len) 49 data_len = btrfs_file_extent_num_bytes(eb, fi);
48 return 1; 50
51 if (extent_item_pos < data_offset ||
52 extent_item_pos >= data_offset + data_len)
53 return 1;
54 offset = extent_item_pos - data_offset;
55 }
49 56
50 e = kmalloc(sizeof(*e), GFP_NOFS); 57 e = kmalloc(sizeof(*e), GFP_NOFS);
51 if (!e) 58 if (!e)
@@ -53,7 +60,7 @@ static int check_extent_in_eb(struct btrfs_key *key, struct extent_buffer *eb,
53 60
54 e->next = *eie; 61 e->next = *eie;
55 e->inum = key->objectid; 62 e->inum = key->objectid;
56 e->offset = key->offset + (extent_item_pos - data_offset); 63 e->offset = key->offset + offset;
57 *eie = e; 64 *eie = e;
58 65
59 return 0; 66 return 0;
@@ -189,7 +196,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
189 struct extent_buffer *eb; 196 struct extent_buffer *eb;
190 struct btrfs_key key; 197 struct btrfs_key key;
191 struct btrfs_file_extent_item *fi; 198 struct btrfs_file_extent_item *fi;
192 struct extent_inode_elem *eie = NULL; 199 struct extent_inode_elem *eie = NULL, *old = NULL;
193 u64 disk_byte; 200 u64 disk_byte;
194 201
195 if (level != 0) { 202 if (level != 0) {
@@ -223,6 +230,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
223 230
224 if (disk_byte == wanted_disk_byte) { 231 if (disk_byte == wanted_disk_byte) {
225 eie = NULL; 232 eie = NULL;
233 old = NULL;
226 if (extent_item_pos) { 234 if (extent_item_pos) {
227 ret = check_extent_in_eb(&key, eb, fi, 235 ret = check_extent_in_eb(&key, eb, fi,
228 *extent_item_pos, 236 *extent_item_pos,
@@ -230,18 +238,20 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
230 if (ret < 0) 238 if (ret < 0)
231 break; 239 break;
232 } 240 }
233 if (!ret) { 241 if (ret > 0)
234 ret = ulist_add(parents, eb->start, 242 goto next;
235 (uintptr_t)eie, GFP_NOFS); 243 ret = ulist_add_merge(parents, eb->start,
236 if (ret < 0) 244 (uintptr_t)eie,
237 break; 245 (u64 *)&old, GFP_NOFS);
238 if (!extent_item_pos) { 246 if (ret < 0)
239 ret = btrfs_next_old_leaf(root, path, 247 break;
240 time_seq); 248 if (!ret && extent_item_pos) {
241 continue; 249 while (old->next)
242 } 250 old = old->next;
251 old->next = eie;
243 } 252 }
244 } 253 }
254next:
245 ret = btrfs_next_old_item(root, path, time_seq); 255 ret = btrfs_next_old_item(root, path, time_seq);
246 } 256 }
247 257
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 5bf4c39e2ad6..ed504607d8ec 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1271,7 +1271,6 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
1271 BUG_ON(!eb_rewin); 1271 BUG_ON(!eb_rewin);
1272 } 1272 }
1273 1273
1274 extent_buffer_get(eb_rewin);
1275 btrfs_tree_read_unlock(eb); 1274 btrfs_tree_read_unlock(eb);
1276 free_extent_buffer(eb); 1275 free_extent_buffer(eb);
1277 1276
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 0236de711989..1204c8ef6f32 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -7466,6 +7466,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
7466 int err = 0; 7466 int err = 0;
7467 int ret; 7467 int ret;
7468 int level; 7468 int level;
7469 bool root_dropped = false;
7469 7470
7470 path = btrfs_alloc_path(); 7471 path = btrfs_alloc_path();
7471 if (!path) { 7472 if (!path) {
@@ -7523,6 +7524,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
7523 while (1) { 7524 while (1) {
7524 btrfs_tree_lock(path->nodes[level]); 7525 btrfs_tree_lock(path->nodes[level]);
7525 btrfs_set_lock_blocking(path->nodes[level]); 7526 btrfs_set_lock_blocking(path->nodes[level]);
7527 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
7526 7528
7527 ret = btrfs_lookup_extent_info(trans, root, 7529 ret = btrfs_lookup_extent_info(trans, root,
7528 path->nodes[level]->start, 7530 path->nodes[level]->start,
@@ -7538,6 +7540,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
7538 break; 7540 break;
7539 7541
7540 btrfs_tree_unlock(path->nodes[level]); 7542 btrfs_tree_unlock(path->nodes[level]);
7543 path->locks[level] = 0;
7541 WARN_ON(wc->refs[level] != 1); 7544 WARN_ON(wc->refs[level] != 1);
7542 level--; 7545 level--;
7543 } 7546 }
@@ -7552,11 +7555,6 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
7552 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); 7555 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
7553 7556
7554 while (1) { 7557 while (1) {
7555 if (!for_reloc && btrfs_need_cleaner_sleep(root)) {
7556 pr_debug("btrfs: drop snapshot early exit\n");
7557 err = -EAGAIN;
7558 goto out_end_trans;
7559 }
7560 7558
7561 ret = walk_down_tree(trans, root, path, wc); 7559 ret = walk_down_tree(trans, root, path, wc);
7562 if (ret < 0) { 7560 if (ret < 0) {
@@ -7584,7 +7582,8 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
7584 } 7582 }
7585 7583
7586 BUG_ON(wc->level == 0); 7584 BUG_ON(wc->level == 0);
7587 if (btrfs_should_end_transaction(trans, tree_root)) { 7585 if (btrfs_should_end_transaction(trans, tree_root) ||
7586 (!for_reloc && btrfs_need_cleaner_sleep(root))) {
7588 ret = btrfs_update_root(trans, tree_root, 7587 ret = btrfs_update_root(trans, tree_root,
7589 &root->root_key, 7588 &root->root_key,
7590 root_item); 7589 root_item);
@@ -7595,6 +7594,12 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
7595 } 7594 }
7596 7595
7597 btrfs_end_transaction_throttle(trans, tree_root); 7596 btrfs_end_transaction_throttle(trans, tree_root);
7597 if (!for_reloc && btrfs_need_cleaner_sleep(root)) {
7598 pr_debug("btrfs: drop snapshot early exit\n");
7599 err = -EAGAIN;
7600 goto out_free;
7601 }
7602
7598 trans = btrfs_start_transaction(tree_root, 0); 7603 trans = btrfs_start_transaction(tree_root, 0);
7599 if (IS_ERR(trans)) { 7604 if (IS_ERR(trans)) {
7600 err = PTR_ERR(trans); 7605 err = PTR_ERR(trans);
@@ -7639,12 +7644,22 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
7639 free_extent_buffer(root->commit_root); 7644 free_extent_buffer(root->commit_root);
7640 btrfs_put_fs_root(root); 7645 btrfs_put_fs_root(root);
7641 } 7646 }
7647 root_dropped = true;
7642out_end_trans: 7648out_end_trans:
7643 btrfs_end_transaction_throttle(trans, tree_root); 7649 btrfs_end_transaction_throttle(trans, tree_root);
7644out_free: 7650out_free:
7645 kfree(wc); 7651 kfree(wc);
7646 btrfs_free_path(path); 7652 btrfs_free_path(path);
7647out: 7653out:
7654 /*
7655 * So if we need to stop dropping the snapshot for whatever reason we
7656 * need to make sure to add it back to the dead root list so that we
7657 * keep trying to do the work later. This also cleans up roots if we
7658 * don't have it in the radix (like when we recover after a power fail
7659 * or unmount) so we don't leak memory.
7660 */
7661 if (root_dropped == false)
7662 btrfs_add_dead_root(root);
7648 if (err) 7663 if (err)
7649 btrfs_std_error(root->fs_info, err); 7664 btrfs_std_error(root->fs_info, err);
7650 return err; 7665 return err;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 583d98bd065e..fe443fece851 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4048,7 +4048,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4048 } 4048 }
4049 4049
4050 while (!end) { 4050 while (!end) {
4051 u64 offset_in_extent; 4051 u64 offset_in_extent = 0;
4052 4052
4053 /* break if the extent we found is outside the range */ 4053 /* break if the extent we found is outside the range */
4054 if (em->start >= max || extent_map_end(em) < off) 4054 if (em->start >= max || extent_map_end(em) < off)
@@ -4064,9 +4064,12 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4064 4064
4065 /* 4065 /*
4066 * record the offset from the start of the extent 4066 * record the offset from the start of the extent
4067 * for adjusting the disk offset below 4067 * for adjusting the disk offset below. Only do this if the
4068 * extent isn't compressed since our in ram offset may be past
4069 * what we have actually allocated on disk.
4068 */ 4070 */
4069 offset_in_extent = em_start - em->start; 4071 if (!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
4072 offset_in_extent = em_start - em->start;
4070 em_end = extent_map_end(em); 4073 em_end = extent_map_end(em);
4071 em_len = em_end - em_start; 4074 em_len = em_end - em_start;
4072 emflags = em->flags; 4075 emflags = em->flags;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index a005fe2c072a..8e686a427ce2 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -596,20 +596,29 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
596 if (no_splits) 596 if (no_splits)
597 goto next; 597 goto next;
598 598
599 if (em->block_start < EXTENT_MAP_LAST_BYTE && 599 if (em->start < start) {
600 em->start < start) {
601 split->start = em->start; 600 split->start = em->start;
602 split->len = start - em->start; 601 split->len = start - em->start;
603 split->orig_start = em->orig_start;
604 split->block_start = em->block_start;
605 602
606 if (compressed) 603 if (em->block_start < EXTENT_MAP_LAST_BYTE) {
607 split->block_len = em->block_len; 604 split->orig_start = em->orig_start;
608 else 605 split->block_start = em->block_start;
609 split->block_len = split->len; 606
610 split->ram_bytes = em->ram_bytes; 607 if (compressed)
611 split->orig_block_len = max(split->block_len, 608 split->block_len = em->block_len;
612 em->orig_block_len); 609 else
610 split->block_len = split->len;
611 split->orig_block_len = max(split->block_len,
612 em->orig_block_len);
613 split->ram_bytes = em->ram_bytes;
614 } else {
615 split->orig_start = split->start;
616 split->block_len = 0;
617 split->block_start = em->block_start;
618 split->orig_block_len = 0;
619 split->ram_bytes = split->len;
620 }
621
613 split->generation = gen; 622 split->generation = gen;
614 split->bdev = em->bdev; 623 split->bdev = em->bdev;
615 split->flags = flags; 624 split->flags = flags;
@@ -620,8 +629,7 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
620 split = split2; 629 split = split2;
621 split2 = NULL; 630 split2 = NULL;
622 } 631 }
623 if (em->block_start < EXTENT_MAP_LAST_BYTE && 632 if (testend && em->start + em->len > start + len) {
624 testend && em->start + em->len > start + len) {
625 u64 diff = start + len - em->start; 633 u64 diff = start + len - em->start;
626 634
627 split->start = start + len; 635 split->start = start + len;
@@ -630,18 +638,28 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
630 split->flags = flags; 638 split->flags = flags;
631 split->compress_type = em->compress_type; 639 split->compress_type = em->compress_type;
632 split->generation = gen; 640 split->generation = gen;
633 split->orig_block_len = max(em->block_len, 641
642 if (em->block_start < EXTENT_MAP_LAST_BYTE) {
643 split->orig_block_len = max(em->block_len,
634 em->orig_block_len); 644 em->orig_block_len);
635 split->ram_bytes = em->ram_bytes;
636 645
637 if (compressed) { 646 split->ram_bytes = em->ram_bytes;
638 split->block_len = em->block_len; 647 if (compressed) {
639 split->block_start = em->block_start; 648 split->block_len = em->block_len;
640 split->orig_start = em->orig_start; 649 split->block_start = em->block_start;
650 split->orig_start = em->orig_start;
651 } else {
652 split->block_len = split->len;
653 split->block_start = em->block_start
654 + diff;
655 split->orig_start = em->orig_start;
656 }
641 } else { 657 } else {
642 split->block_len = split->len; 658 split->ram_bytes = split->len;
643 split->block_start = em->block_start + diff; 659 split->orig_start = split->start;
644 split->orig_start = em->orig_start; 660 split->block_len = 0;
661 split->block_start = em->block_start;
662 split->orig_block_len = 0;
645 } 663 }
646 664
647 ret = add_extent_mapping(em_tree, split, modified); 665 ret = add_extent_mapping(em_tree, split, modified);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 6d1b93c8aafb..021694c08181 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2166,16 +2166,23 @@ static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
2166 if (btrfs_file_extent_disk_bytenr(leaf, extent) != old->bytenr) 2166 if (btrfs_file_extent_disk_bytenr(leaf, extent) != old->bytenr)
2167 continue; 2167 continue;
2168 2168
2169 extent_offset = btrfs_file_extent_offset(leaf, extent); 2169 /*
2170 if (key.offset - extent_offset != offset) 2170 * 'offset' refers to the exact key.offset,
2171 * NOT the 'offset' field in btrfs_extent_data_ref, ie.
2172 * (key.offset - extent_offset).
2173 */
2174 if (key.offset != offset)
2171 continue; 2175 continue;
2172 2176
2177 extent_offset = btrfs_file_extent_offset(leaf, extent);
2173 num_bytes = btrfs_file_extent_num_bytes(leaf, extent); 2178 num_bytes = btrfs_file_extent_num_bytes(leaf, extent);
2179
2174 if (extent_offset >= old->extent_offset + old->offset + 2180 if (extent_offset >= old->extent_offset + old->offset +
2175 old->len || extent_offset + num_bytes <= 2181 old->len || extent_offset + num_bytes <=
2176 old->extent_offset + old->offset) 2182 old->extent_offset + old->offset)
2177 continue; 2183 continue;
2178 2184
2185 ret = 0;
2179 break; 2186 break;
2180 } 2187 }
2181 2188
@@ -2187,7 +2194,7 @@ static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
2187 2194
2188 backref->root_id = root_id; 2195 backref->root_id = root_id;
2189 backref->inum = inum; 2196 backref->inum = inum;
2190 backref->file_pos = offset + extent_offset; 2197 backref->file_pos = offset;
2191 backref->num_bytes = num_bytes; 2198 backref->num_bytes = num_bytes;
2192 backref->extent_offset = extent_offset; 2199 backref->extent_offset = extent_offset;
2193 backref->generation = btrfs_file_extent_generation(leaf, extent); 2200 backref->generation = btrfs_file_extent_generation(leaf, extent);
@@ -2210,7 +2217,8 @@ static noinline bool record_extent_backrefs(struct btrfs_path *path,
2210 new->path = path; 2217 new->path = path;
2211 2218
2212 list_for_each_entry_safe(old, tmp, &new->head, list) { 2219 list_for_each_entry_safe(old, tmp, &new->head, list) {
2213 ret = iterate_inodes_from_logical(old->bytenr, fs_info, 2220 ret = iterate_inodes_from_logical(old->bytenr +
2221 old->extent_offset, fs_info,
2214 path, record_one_backref, 2222 path, record_one_backref,
2215 old); 2223 old);
2216 BUG_ON(ret < 0 && ret != -ENOENT); 2224 BUG_ON(ret < 0 && ret != -ENOENT);
@@ -4391,9 +4399,6 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
4391 int mask = attr->ia_valid; 4399 int mask = attr->ia_valid;
4392 int ret; 4400 int ret;
4393 4401
4394 if (newsize == oldsize)
4395 return 0;
4396
4397 /* 4402 /*
4398 * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a 4403 * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a
4399 * special case where we need to update the times despite not having 4404 * special case where we need to update the times despite not having
@@ -5165,14 +5170,31 @@ next:
5165 } 5170 }
5166 5171
5167 /* Reached end of directory/root. Bump pos past the last item. */ 5172 /* Reached end of directory/root. Bump pos past the last item. */
5168 if (key_type == BTRFS_DIR_INDEX_KEY) 5173 ctx->pos++;
5169 /* 5174
5170 * 32-bit glibc will use getdents64, but then strtol - 5175 /*
5171 * so the last number we can serve is this. 5176 * Stop new entries from being returned after we return the last
5172 */ 5177 * entry.
5173 ctx->pos = 0x7fffffff; 5178 *
5174 else 5179 * New directory entries are assigned a strictly increasing
5175 ctx->pos++; 5180 * offset. This means that new entries created during readdir
5181 * are *guaranteed* to be seen in the future by that readdir.
5182 * This has broken buggy programs which operate on names as
5183 * they're returned by readdir. Until we re-use freed offsets
5184 * we have this hack to stop new entries from being returned
5185 * under the assumption that they'll never reach this huge
5186 * offset.
5187 *
5188 * This is being careful not to overflow 32bit loff_t unless the
5189 * last entry requires it because doing so has broken 32bit apps
5190 * in the past.
5191 */
5192 if (key_type == BTRFS_DIR_INDEX_KEY) {
5193 if (ctx->pos >= INT_MAX)
5194 ctx->pos = LLONG_MAX;
5195 else
5196 ctx->pos = INT_MAX;
5197 }
5176nopos: 5198nopos:
5177 ret = 0; 5199 ret = 0;
5178err: 5200err:
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 4ba2a69a60ad..64a157becbe5 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -2495,7 +2495,7 @@ again:
2495 ret = scrub_extent(sctx, extent_logical, extent_len, 2495 ret = scrub_extent(sctx, extent_logical, extent_len,
2496 extent_physical, extent_dev, flags, 2496 extent_physical, extent_dev, flags,
2497 generation, extent_mirror_num, 2497 generation, extent_mirror_num,
2498 extent_physical); 2498 extent_logical - logical + physical);
2499 if (ret) 2499 if (ret)
2500 goto out; 2500 goto out;
2501 2501
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index d58cce77fc6c..af1931a5960d 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -983,12 +983,12 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
983 * a dirty root struct and adds it into the list of dead roots that need to 983 * a dirty root struct and adds it into the list of dead roots that need to
984 * be deleted 984 * be deleted
985 */ 985 */
986int btrfs_add_dead_root(struct btrfs_root *root) 986void btrfs_add_dead_root(struct btrfs_root *root)
987{ 987{
988 spin_lock(&root->fs_info->trans_lock); 988 spin_lock(&root->fs_info->trans_lock);
989 list_add_tail(&root->root_list, &root->fs_info->dead_roots); 989 if (list_empty(&root->root_list))
990 list_add_tail(&root->root_list, &root->fs_info->dead_roots);
990 spin_unlock(&root->fs_info->trans_lock); 991 spin_unlock(&root->fs_info->trans_lock);
991 return 0;
992} 992}
993 993
994/* 994/*
@@ -1925,7 +1925,7 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root)
1925 } 1925 }
1926 root = list_first_entry(&fs_info->dead_roots, 1926 root = list_first_entry(&fs_info->dead_roots,
1927 struct btrfs_root, root_list); 1927 struct btrfs_root, root_list);
1928 list_del(&root->root_list); 1928 list_del_init(&root->root_list);
1929 spin_unlock(&fs_info->trans_lock); 1929 spin_unlock(&fs_info->trans_lock);
1930 1930
1931 pr_debug("btrfs: cleaner removing %llu\n", 1931 pr_debug("btrfs: cleaner removing %llu\n",
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 005b0375d18c..defbc4269897 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -143,7 +143,7 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid);
143int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, 143int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
144 struct btrfs_root *root); 144 struct btrfs_root *root);
145 145
146int btrfs_add_dead_root(struct btrfs_root *root); 146void btrfs_add_dead_root(struct btrfs_root *root);
147int btrfs_defrag_root(struct btrfs_root *root); 147int btrfs_defrag_root(struct btrfs_root *root);
148int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root); 148int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root);
149int btrfs_commit_transaction(struct btrfs_trans_handle *trans, 149int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 2c6791493637..ff60d8978ae2 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3746,8 +3746,9 @@ next_slot:
3746 } 3746 }
3747 3747
3748log_extents: 3748log_extents:
3749 btrfs_release_path(path);
3750 btrfs_release_path(dst_path);
3749 if (fast_search) { 3751 if (fast_search) {
3750 btrfs_release_path(dst_path);
3751 ret = btrfs_log_changed_extents(trans, root, inode, dst_path); 3752 ret = btrfs_log_changed_extents(trans, root, inode, dst_path);
3752 if (ret) { 3753 if (ret) {
3753 err = ret; 3754 err = ret;
@@ -3764,8 +3765,6 @@ log_extents:
3764 } 3765 }
3765 3766
3766 if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { 3767 if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) {
3767 btrfs_release_path(path);
3768 btrfs_release_path(dst_path);
3769 ret = log_directory_changes(trans, root, inode, path, dst_path); 3768 ret = log_directory_changes(trans, root, inode, path, dst_path);
3770 if (ret) { 3769 if (ret) {
3771 err = ret; 3770 err = ret;
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 45e57cc38200..fc6f4f3a1a9d 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -43,17 +43,18 @@ cifs_crypto_shash_md5_allocate(struct TCP_Server_Info *server)
43 server->secmech.md5 = crypto_alloc_shash("md5", 0, 0); 43 server->secmech.md5 = crypto_alloc_shash("md5", 0, 0);
44 if (IS_ERR(server->secmech.md5)) { 44 if (IS_ERR(server->secmech.md5)) {
45 cifs_dbg(VFS, "could not allocate crypto md5\n"); 45 cifs_dbg(VFS, "could not allocate crypto md5\n");
46 return PTR_ERR(server->secmech.md5); 46 rc = PTR_ERR(server->secmech.md5);
47 server->secmech.md5 = NULL;
48 return rc;
47 } 49 }
48 50
49 size = sizeof(struct shash_desc) + 51 size = sizeof(struct shash_desc) +
50 crypto_shash_descsize(server->secmech.md5); 52 crypto_shash_descsize(server->secmech.md5);
51 server->secmech.sdescmd5 = kmalloc(size, GFP_KERNEL); 53 server->secmech.sdescmd5 = kmalloc(size, GFP_KERNEL);
52 if (!server->secmech.sdescmd5) { 54 if (!server->secmech.sdescmd5) {
53 rc = -ENOMEM;
54 crypto_free_shash(server->secmech.md5); 55 crypto_free_shash(server->secmech.md5);
55 server->secmech.md5 = NULL; 56 server->secmech.md5 = NULL;
56 return rc; 57 return -ENOMEM;
57 } 58 }
58 server->secmech.sdescmd5->shash.tfm = server->secmech.md5; 59 server->secmech.sdescmd5->shash.tfm = server->secmech.md5;
59 server->secmech.sdescmd5->shash.flags = 0x0; 60 server->secmech.sdescmd5->shash.flags = 0x0;
@@ -421,7 +422,7 @@ find_domain_name(struct cifs_ses *ses, const struct nls_table *nls_cp)
421 if (blobptr + attrsize > blobend) 422 if (blobptr + attrsize > blobend)
422 break; 423 break;
423 if (type == NTLMSSP_AV_NB_DOMAIN_NAME) { 424 if (type == NTLMSSP_AV_NB_DOMAIN_NAME) {
424 if (!attrsize) 425 if (!attrsize || attrsize >= CIFS_MAX_DOMAINNAME_LEN)
425 break; 426 break;
426 if (!ses->domainName) { 427 if (!ses->domainName) {
427 ses->domainName = 428 ses->domainName =
@@ -591,6 +592,7 @@ CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash)
591 592
592static int crypto_hmacmd5_alloc(struct TCP_Server_Info *server) 593static int crypto_hmacmd5_alloc(struct TCP_Server_Info *server)
593{ 594{
595 int rc;
594 unsigned int size; 596 unsigned int size;
595 597
596 /* check if already allocated */ 598 /* check if already allocated */
@@ -600,7 +602,9 @@ static int crypto_hmacmd5_alloc(struct TCP_Server_Info *server)
600 server->secmech.hmacmd5 = crypto_alloc_shash("hmac(md5)", 0, 0); 602 server->secmech.hmacmd5 = crypto_alloc_shash("hmac(md5)", 0, 0);
601 if (IS_ERR(server->secmech.hmacmd5)) { 603 if (IS_ERR(server->secmech.hmacmd5)) {
602 cifs_dbg(VFS, "could not allocate crypto hmacmd5\n"); 604 cifs_dbg(VFS, "could not allocate crypto hmacmd5\n");
603 return PTR_ERR(server->secmech.hmacmd5); 605 rc = PTR_ERR(server->secmech.hmacmd5);
606 server->secmech.hmacmd5 = NULL;
607 return rc;
604 } 608 }
605 609
606 size = sizeof(struct shash_desc) + 610 size = sizeof(struct shash_desc) +
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 4bdd547dbf6f..85ea98d139fc 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -147,18 +147,17 @@ cifs_read_super(struct super_block *sb)
147 goto out_no_root; 147 goto out_no_root;
148 } 148 }
149 149
150 if (cifs_sb_master_tcon(cifs_sb)->nocase)
151 sb->s_d_op = &cifs_ci_dentry_ops;
152 else
153 sb->s_d_op = &cifs_dentry_ops;
154
150 sb->s_root = d_make_root(inode); 155 sb->s_root = d_make_root(inode);
151 if (!sb->s_root) { 156 if (!sb->s_root) {
152 rc = -ENOMEM; 157 rc = -ENOMEM;
153 goto out_no_root; 158 goto out_no_root;
154 } 159 }
155 160
156 /* do that *after* d_make_root() - we want NULL ->d_op for root here */
157 if (cifs_sb_master_tcon(cifs_sb)->nocase)
158 sb->s_d_op = &cifs_ci_dentry_ops;
159 else
160 sb->s_d_op = &cifs_dentry_ops;
161
162#ifdef CONFIG_CIFS_NFSD_EXPORT 161#ifdef CONFIG_CIFS_NFSD_EXPORT
163 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { 162 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) {
164 cifs_dbg(FYI, "export ops supported\n"); 163 cifs_dbg(FYI, "export ops supported\n");
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 1fdc37041057..52ca861ed35e 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -44,6 +44,7 @@
44#define MAX_TREE_SIZE (2 + MAX_SERVER_SIZE + 1 + MAX_SHARE_SIZE + 1) 44#define MAX_TREE_SIZE (2 + MAX_SERVER_SIZE + 1 + MAX_SHARE_SIZE + 1)
45#define MAX_SERVER_SIZE 15 45#define MAX_SERVER_SIZE 15
46#define MAX_SHARE_SIZE 80 46#define MAX_SHARE_SIZE 80
47#define CIFS_MAX_DOMAINNAME_LEN 256 /* max domain name length */
47#define MAX_USERNAME_SIZE 256 /* reasonable maximum for current servers */ 48#define MAX_USERNAME_SIZE 256 /* reasonable maximum for current servers */
48#define MAX_PASSWORD_SIZE 512 /* max for windows seems to be 256 wide chars */ 49#define MAX_PASSWORD_SIZE 512 /* max for windows seems to be 256 wide chars */
49 50
@@ -369,6 +370,9 @@ struct smb_version_operations {
369 void (*generate_signingkey)(struct TCP_Server_Info *server); 370 void (*generate_signingkey)(struct TCP_Server_Info *server);
370 int (*calc_signature)(struct smb_rqst *rqst, 371 int (*calc_signature)(struct smb_rqst *rqst,
371 struct TCP_Server_Info *server); 372 struct TCP_Server_Info *server);
373 int (*query_mf_symlink)(const unsigned char *path, char *pbuf,
374 unsigned int *pbytes_read, struct cifs_sb_info *cifs_sb,
375 unsigned int xid);
372}; 376};
373 377
374struct smb_version_values { 378struct smb_version_values {
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index f7e584d047e2..b29a012bed33 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -497,5 +497,7 @@ void cifs_writev_complete(struct work_struct *work);
497struct cifs_writedata *cifs_writedata_alloc(unsigned int nr_pages, 497struct cifs_writedata *cifs_writedata_alloc(unsigned int nr_pages,
498 work_func_t complete); 498 work_func_t complete);
499void cifs_writedata_release(struct kref *refcount); 499void cifs_writedata_release(struct kref *refcount);
500 500int open_query_close_cifs_symlink(const unsigned char *path, char *pbuf,
501 unsigned int *pbytes_read, struct cifs_sb_info *cifs_sb,
502 unsigned int xid);
501#endif /* _CIFSPROTO_H */ 503#endif /* _CIFSPROTO_H */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index fa68813396b5..d67c550c4980 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1675,7 +1675,8 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
1675 if (string == NULL) 1675 if (string == NULL)
1676 goto out_nomem; 1676 goto out_nomem;
1677 1677
1678 if (strnlen(string, 256) == 256) { 1678 if (strnlen(string, CIFS_MAX_DOMAINNAME_LEN)
1679 == CIFS_MAX_DOMAINNAME_LEN) {
1679 printk(KERN_WARNING "CIFS: domain name too" 1680 printk(KERN_WARNING "CIFS: domain name too"
1680 " long\n"); 1681 " long\n");
1681 goto cifs_parse_mount_err; 1682 goto cifs_parse_mount_err;
@@ -2276,8 +2277,8 @@ cifs_put_smb_ses(struct cifs_ses *ses)
2276 2277
2277#ifdef CONFIG_KEYS 2278#ifdef CONFIG_KEYS
2278 2279
2279/* strlen("cifs:a:") + INET6_ADDRSTRLEN + 1 */ 2280/* strlen("cifs:a:") + CIFS_MAX_DOMAINNAME_LEN + 1 */
2280#define CIFSCREDS_DESC_SIZE (7 + INET6_ADDRSTRLEN + 1) 2281#define CIFSCREDS_DESC_SIZE (7 + CIFS_MAX_DOMAINNAME_LEN + 1)
2281 2282
2282/* Populate username and pw fields from keyring if possible */ 2283/* Populate username and pw fields from keyring if possible */
2283static int 2284static int
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 1e57f36ea1b2..7e36ae34e947 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -647,6 +647,7 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
647 oflags, &oplock, &cfile->fid.netfid, xid); 647 oflags, &oplock, &cfile->fid.netfid, xid);
648 if (rc == 0) { 648 if (rc == 0) {
649 cifs_dbg(FYI, "posix reopen succeeded\n"); 649 cifs_dbg(FYI, "posix reopen succeeded\n");
650 oparms.reconnect = true;
650 goto reopen_success; 651 goto reopen_success;
651 } 652 }
652 /* 653 /*
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index b83c3f5646bd..562044f700e5 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -305,67 +305,89 @@ CIFSCouldBeMFSymlink(const struct cifs_fattr *fattr)
305} 305}
306 306
307int 307int
308CIFSCheckMFSymlink(struct cifs_fattr *fattr, 308open_query_close_cifs_symlink(const unsigned char *path, char *pbuf,
309 const unsigned char *path, 309 unsigned int *pbytes_read, struct cifs_sb_info *cifs_sb,
310 struct cifs_sb_info *cifs_sb, unsigned int xid) 310 unsigned int xid)
311{ 311{
312 int rc; 312 int rc;
313 int oplock = 0; 313 int oplock = 0;
314 __u16 netfid = 0; 314 __u16 netfid = 0;
315 struct tcon_link *tlink; 315 struct tcon_link *tlink;
316 struct cifs_tcon *pTcon; 316 struct cifs_tcon *ptcon;
317 struct cifs_io_parms io_parms; 317 struct cifs_io_parms io_parms;
318 u8 *buf;
319 char *pbuf;
320 unsigned int bytes_read = 0;
321 int buf_type = CIFS_NO_BUFFER; 318 int buf_type = CIFS_NO_BUFFER;
322 unsigned int link_len = 0;
323 FILE_ALL_INFO file_info; 319 FILE_ALL_INFO file_info;
324 320
325 if (!CIFSCouldBeMFSymlink(fattr))
326 /* it's not a symlink */
327 return 0;
328
329 tlink = cifs_sb_tlink(cifs_sb); 321 tlink = cifs_sb_tlink(cifs_sb);
330 if (IS_ERR(tlink)) 322 if (IS_ERR(tlink))
331 return PTR_ERR(tlink); 323 return PTR_ERR(tlink);
332 pTcon = tlink_tcon(tlink); 324 ptcon = tlink_tcon(tlink);
333 325
334 rc = CIFSSMBOpen(xid, pTcon, path, FILE_OPEN, GENERIC_READ, 326 rc = CIFSSMBOpen(xid, ptcon, path, FILE_OPEN, GENERIC_READ,
335 CREATE_NOT_DIR, &netfid, &oplock, &file_info, 327 CREATE_NOT_DIR, &netfid, &oplock, &file_info,
336 cifs_sb->local_nls, 328 cifs_sb->local_nls,
337 cifs_sb->mnt_cifs_flags & 329 cifs_sb->mnt_cifs_flags &
338 CIFS_MOUNT_MAP_SPECIAL_CHR); 330 CIFS_MOUNT_MAP_SPECIAL_CHR);
339 if (rc != 0) 331 if (rc != 0) {
340 goto out; 332 cifs_put_tlink(tlink);
333 return rc;
334 }
341 335
342 if (file_info.EndOfFile != cpu_to_le64(CIFS_MF_SYMLINK_FILE_SIZE)) { 336 if (file_info.EndOfFile != cpu_to_le64(CIFS_MF_SYMLINK_FILE_SIZE)) {
343 CIFSSMBClose(xid, pTcon, netfid); 337 CIFSSMBClose(xid, ptcon, netfid);
338 cifs_put_tlink(tlink);
344 /* it's not a symlink */ 339 /* it's not a symlink */
345 goto out; 340 return rc;
346 } 341 }
347 342
348 buf = kmalloc(CIFS_MF_SYMLINK_FILE_SIZE, GFP_KERNEL);
349 if (!buf) {
350 rc = -ENOMEM;
351 goto out;
352 }
353 pbuf = buf;
354 io_parms.netfid = netfid; 343 io_parms.netfid = netfid;
355 io_parms.pid = current->tgid; 344 io_parms.pid = current->tgid;
356 io_parms.tcon = pTcon; 345 io_parms.tcon = ptcon;
357 io_parms.offset = 0; 346 io_parms.offset = 0;
358 io_parms.length = CIFS_MF_SYMLINK_FILE_SIZE; 347 io_parms.length = CIFS_MF_SYMLINK_FILE_SIZE;
359 348
360 rc = CIFSSMBRead(xid, &io_parms, &bytes_read, &pbuf, &buf_type); 349 rc = CIFSSMBRead(xid, &io_parms, pbytes_read, &pbuf, &buf_type);
361 CIFSSMBClose(xid, pTcon, netfid); 350 CIFSSMBClose(xid, ptcon, netfid);
362 if (rc != 0) { 351 cifs_put_tlink(tlink);
363 kfree(buf); 352 return rc;
353}
354
355
356int
357CIFSCheckMFSymlink(struct cifs_fattr *fattr,
358 const unsigned char *path,
359 struct cifs_sb_info *cifs_sb, unsigned int xid)
360{
361 int rc = 0;
362 u8 *buf = NULL;
363 unsigned int link_len = 0;
364 unsigned int bytes_read = 0;
365 struct cifs_tcon *ptcon;
366
367 if (!CIFSCouldBeMFSymlink(fattr))
368 /* it's not a symlink */
369 return 0;
370
371 buf = kmalloc(CIFS_MF_SYMLINK_FILE_SIZE, GFP_KERNEL);
372 if (!buf) {
373 rc = -ENOMEM;
364 goto out; 374 goto out;
365 } 375 }
366 376
377 ptcon = tlink_tcon(cifs_sb_tlink(cifs_sb));
378 if ((ptcon->ses) && (ptcon->ses->server->ops->query_mf_symlink))
379 rc = ptcon->ses->server->ops->query_mf_symlink(path, buf,
380 &bytes_read, cifs_sb, xid);
381 else
382 goto out;
383
384 if (rc != 0)
385 goto out;
386
387 if (bytes_read == 0) /* not a symlink */
388 goto out;
389
367 rc = CIFSParseMFSymlink(buf, bytes_read, &link_len, NULL); 390 rc = CIFSParseMFSymlink(buf, bytes_read, &link_len, NULL);
368 kfree(buf);
369 if (rc == -EINVAL) { 391 if (rc == -EINVAL) {
370 /* it's not a symlink */ 392 /* it's not a symlink */
371 rc = 0; 393 rc = 0;
@@ -381,7 +403,7 @@ CIFSCheckMFSymlink(struct cifs_fattr *fattr,
381 fattr->cf_mode |= S_IFLNK | S_IRWXU | S_IRWXG | S_IRWXO; 403 fattr->cf_mode |= S_IFLNK | S_IRWXU | S_IRWXG | S_IRWXO;
382 fattr->cf_dtype = DT_LNK; 404 fattr->cf_dtype = DT_LNK;
383out: 405out:
384 cifs_put_tlink(tlink); 406 kfree(buf);
385 return rc; 407 return rc;
386} 408}
387 409
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index ab8778469394..69d2c826a23b 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -111,6 +111,14 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name,
111 return; 111 return;
112 } 112 }
113 113
114 /*
115 * If we know that the inode will need to be revalidated immediately,
116 * then don't create a new dentry for it. We'll end up doing an on
117 * the wire call either way and this spares us an invalidation.
118 */
119 if (fattr->cf_flags & CIFS_FATTR_NEED_REVAL)
120 return;
121
114 dentry = d_alloc(parent, name); 122 dentry = d_alloc(parent, name);
115 if (!dentry) 123 if (!dentry)
116 return; 124 return;
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 79358e341fd2..08dd37bb23aa 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -197,7 +197,7 @@ static void unicode_domain_string(char **pbcc_area, struct cifs_ses *ses,
197 bytes_ret = 0; 197 bytes_ret = 0;
198 } else 198 } else
199 bytes_ret = cifs_strtoUTF16((__le16 *) bcc_ptr, ses->domainName, 199 bytes_ret = cifs_strtoUTF16((__le16 *) bcc_ptr, ses->domainName,
200 256, nls_cp); 200 CIFS_MAX_DOMAINNAME_LEN, nls_cp);
201 bcc_ptr += 2 * bytes_ret; 201 bcc_ptr += 2 * bytes_ret;
202 bcc_ptr += 2; /* account for null terminator */ 202 bcc_ptr += 2; /* account for null terminator */
203 203
@@ -255,8 +255,8 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifs_ses *ses,
255 255
256 /* copy domain */ 256 /* copy domain */
257 if (ses->domainName != NULL) { 257 if (ses->domainName != NULL) {
258 strncpy(bcc_ptr, ses->domainName, 256); 258 strncpy(bcc_ptr, ses->domainName, CIFS_MAX_DOMAINNAME_LEN);
259 bcc_ptr += strnlen(ses->domainName, 256); 259 bcc_ptr += strnlen(ses->domainName, CIFS_MAX_DOMAINNAME_LEN);
260 } /* else we will send a null domain name 260 } /* else we will send a null domain name
261 so the server will default to its own domain */ 261 so the server will default to its own domain */
262 *bcc_ptr = 0; 262 *bcc_ptr = 0;
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 6457690731a2..60943978aec3 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -944,6 +944,7 @@ struct smb_version_operations smb1_operations = {
944 .mand_lock = cifs_mand_lock, 944 .mand_lock = cifs_mand_lock,
945 .mand_unlock_range = cifs_unlock_range, 945 .mand_unlock_range = cifs_unlock_range,
946 .push_mand_locks = cifs_push_mandatory_locks, 946 .push_mand_locks = cifs_push_mandatory_locks,
947 .query_mf_symlink = open_query_close_cifs_symlink,
947}; 948};
948 949
949struct smb_version_values smb1_values = { 950struct smb_version_values smb1_values = {
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index 301b191270b9..4f2300d020c7 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -42,6 +42,7 @@
42static int 42static int
43smb2_crypto_shash_allocate(struct TCP_Server_Info *server) 43smb2_crypto_shash_allocate(struct TCP_Server_Info *server)
44{ 44{
45 int rc;
45 unsigned int size; 46 unsigned int size;
46 47
47 if (server->secmech.sdeschmacsha256 != NULL) 48 if (server->secmech.sdeschmacsha256 != NULL)
@@ -50,7 +51,9 @@ smb2_crypto_shash_allocate(struct TCP_Server_Info *server)
50 server->secmech.hmacsha256 = crypto_alloc_shash("hmac(sha256)", 0, 0); 51 server->secmech.hmacsha256 = crypto_alloc_shash("hmac(sha256)", 0, 0);
51 if (IS_ERR(server->secmech.hmacsha256)) { 52 if (IS_ERR(server->secmech.hmacsha256)) {
52 cifs_dbg(VFS, "could not allocate crypto hmacsha256\n"); 53 cifs_dbg(VFS, "could not allocate crypto hmacsha256\n");
53 return PTR_ERR(server->secmech.hmacsha256); 54 rc = PTR_ERR(server->secmech.hmacsha256);
55 server->secmech.hmacsha256 = NULL;
56 return rc;
54 } 57 }
55 58
56 size = sizeof(struct shash_desc) + 59 size = sizeof(struct shash_desc) +
@@ -87,7 +90,9 @@ smb3_crypto_shash_allocate(struct TCP_Server_Info *server)
87 server->secmech.sdeschmacsha256 = NULL; 90 server->secmech.sdeschmacsha256 = NULL;
88 crypto_free_shash(server->secmech.hmacsha256); 91 crypto_free_shash(server->secmech.hmacsha256);
89 server->secmech.hmacsha256 = NULL; 92 server->secmech.hmacsha256 = NULL;
90 return PTR_ERR(server->secmech.cmacaes); 93 rc = PTR_ERR(server->secmech.cmacaes);
94 server->secmech.cmacaes = NULL;
95 return rc;
91 } 96 }
92 97
93 size = sizeof(struct shash_desc) + 98 size = sizeof(struct shash_desc) +
diff --git a/fs/dcache.c b/fs/dcache.c
index 87bdb5329c3c..83cfb834db03 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2724,6 +2724,17 @@ char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
2724 return memcpy(buffer, temp, sz); 2724 return memcpy(buffer, temp, sz);
2725} 2725}
2726 2726
2727char *simple_dname(struct dentry *dentry, char *buffer, int buflen)
2728{
2729 char *end = buffer + buflen;
2730 /* these dentries are never renamed, so d_lock is not needed */
2731 if (prepend(&end, &buflen, " (deleted)", 11) ||
2732 prepend_name(&end, &buflen, &dentry->d_name) ||
2733 prepend(&end, &buflen, "/", 1))
2734 end = ERR_PTR(-ENAMETOOLONG);
2735 return end;
2736}
2737
2727/* 2738/*
2728 * Write full pathname from the root of the filesystem into the buffer. 2739 * Write full pathname from the root of the filesystem into the buffer.
2729 */ 2740 */
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 4888cb3fdef7..c7c83ff0f752 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -533,8 +533,7 @@ EXPORT_SYMBOL_GPL(debugfs_remove);
533 */ 533 */
534void debugfs_remove_recursive(struct dentry *dentry) 534void debugfs_remove_recursive(struct dentry *dentry)
535{ 535{
536 struct dentry *child; 536 struct dentry *child, *next, *parent;
537 struct dentry *parent;
538 537
539 if (IS_ERR_OR_NULL(dentry)) 538 if (IS_ERR_OR_NULL(dentry))
540 return; 539 return;
@@ -544,61 +543,37 @@ void debugfs_remove_recursive(struct dentry *dentry)
544 return; 543 return;
545 544
546 parent = dentry; 545 parent = dentry;
546 down:
547 mutex_lock(&parent->d_inode->i_mutex); 547 mutex_lock(&parent->d_inode->i_mutex);
548 list_for_each_entry_safe(child, next, &parent->d_subdirs, d_u.d_child) {
549 if (!debugfs_positive(child))
550 continue;
548 551
549 while (1) { 552 /* perhaps simple_empty(child) makes more sense */
550 /*
551 * When all dentries under "parent" has been removed,
552 * walk up the tree until we reach our starting point.
553 */
554 if (list_empty(&parent->d_subdirs)) {
555 mutex_unlock(&parent->d_inode->i_mutex);
556 if (parent == dentry)
557 break;
558 parent = parent->d_parent;
559 mutex_lock(&parent->d_inode->i_mutex);
560 }
561 child = list_entry(parent->d_subdirs.next, struct dentry,
562 d_u.d_child);
563 next_sibling:
564
565 /*
566 * If "child" isn't empty, walk down the tree and
567 * remove all its descendants first.
568 */
569 if (!list_empty(&child->d_subdirs)) { 553 if (!list_empty(&child->d_subdirs)) {
570 mutex_unlock(&parent->d_inode->i_mutex); 554 mutex_unlock(&parent->d_inode->i_mutex);
571 parent = child; 555 parent = child;
572 mutex_lock(&parent->d_inode->i_mutex); 556 goto down;
573 continue;
574 } 557 }
575 __debugfs_remove(child, parent); 558 up:
576 if (parent->d_subdirs.next == &child->d_u.d_child) { 559 if (!__debugfs_remove(child, parent))
577 /* 560 simple_release_fs(&debugfs_mount, &debugfs_mount_count);
578 * Try the next sibling.
579 */
580 if (child->d_u.d_child.next != &parent->d_subdirs) {
581 child = list_entry(child->d_u.d_child.next,
582 struct dentry,
583 d_u.d_child);
584 goto next_sibling;
585 }
586
587 /*
588 * Avoid infinite loop if we fail to remove
589 * one dentry.
590 */
591 mutex_unlock(&parent->d_inode->i_mutex);
592 break;
593 }
594 simple_release_fs(&debugfs_mount, &debugfs_mount_count);
595 } 561 }
596 562
597 parent = dentry->d_parent; 563 mutex_unlock(&parent->d_inode->i_mutex);
564 child = parent;
565 parent = parent->d_parent;
598 mutex_lock(&parent->d_inode->i_mutex); 566 mutex_lock(&parent->d_inode->i_mutex);
599 __debugfs_remove(dentry, parent); 567
568 if (child != dentry) {
569 next = list_entry(child->d_u.d_child.next, struct dentry,
570 d_u.d_child);
571 goto up;
572 }
573
574 if (!__debugfs_remove(child, parent))
575 simple_release_fs(&debugfs_mount, &debugfs_mount_count);
600 mutex_unlock(&parent->d_inode->i_mutex); 576 mutex_unlock(&parent->d_inode->i_mutex);
601 simple_release_fs(&debugfs_mount, &debugfs_mount_count);
602} 577}
603EXPORT_SYMBOL_GPL(debugfs_remove_recursive); 578EXPORT_SYMBOL_GPL(debugfs_remove_recursive);
604 579
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index 911649a47dd5..812149119fa3 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -686,7 +686,6 @@ static int device_close(struct inode *inode, struct file *file)
686 device_remove_lockspace() */ 686 device_remove_lockspace() */
687 687
688 sigprocmask(SIG_SETMASK, &tmpsig, NULL); 688 sigprocmask(SIG_SETMASK, &tmpsig, NULL);
689 recalc_sigpending();
690 689
691 return 0; 690 return 0;
692} 691}
diff --git a/fs/efs/inode.c b/fs/efs/inode.c
index f3913eb2c474..d15ccf20f1b3 100644
--- a/fs/efs/inode.c
+++ b/fs/efs/inode.c
@@ -57,7 +57,7 @@ struct inode *efs_iget(struct super_block *super, unsigned long ino)
57 struct inode *inode; 57 struct inode *inode;
58 58
59 inode = iget_locked(super, ino); 59 inode = iget_locked(super, ino);
60 if (IS_ERR(inode)) 60 if (!inode)
61 return ERR_PTR(-ENOMEM); 61 return ERR_PTR(-ENOMEM);
62 if (!(inode->i_state & I_NEW)) 62 if (!(inode->i_state & I_NEW))
63 return inode; 63 return inode;
diff --git a/fs/exec.c b/fs/exec.c
index 9c73def87642..fd774c7cb483 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -608,7 +608,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
608 return -ENOMEM; 608 return -ENOMEM;
609 609
610 lru_add_drain(); 610 lru_add_drain();
611 tlb_gather_mmu(&tlb, mm, 0); 611 tlb_gather_mmu(&tlb, mm, old_start, old_end);
612 if (new_end > old_start) { 612 if (new_end > old_start) {
613 /* 613 /*
614 * when the old and new regions overlap clear from new_end. 614 * when the old and new regions overlap clear from new_end.
@@ -625,7 +625,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
625 free_pgd_range(&tlb, old_start, old_end, new_end, 625 free_pgd_range(&tlb, old_start, old_end, new_end,
626 vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING); 626 vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING);
627 } 627 }
628 tlb_finish_mmu(&tlb, new_end, old_end); 628 tlb_finish_mmu(&tlb, old_start, old_end);
629 629
630 /* 630 /*
631 * Shrink the vma to just the new range. Always succeeds. 631 * Shrink the vma to just the new range. Always succeeds.
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 998ea111e537..1194b1f0f839 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1780,11 +1780,11 @@ retry:
1780 inode->i_op = &ext3_file_inode_operations; 1780 inode->i_op = &ext3_file_inode_operations;
1781 inode->i_fop = &ext3_file_operations; 1781 inode->i_fop = &ext3_file_operations;
1782 ext3_set_aops(inode); 1782 ext3_set_aops(inode);
1783 d_tmpfile(dentry, inode);
1783 err = ext3_orphan_add(handle, inode); 1784 err = ext3_orphan_add(handle, inode);
1784 if (err) 1785 if (err)
1785 goto err_drop_inode; 1786 goto err_drop_inode;
1786 mark_inode_dirty(inode); 1787 mark_inode_dirty(inode);
1787 d_tmpfile(dentry, inode);
1788 unlock_new_inode(inode); 1788 unlock_new_inode(inode);
1789 } 1789 }
1790 ext3_journal_stop(handle); 1790 ext3_journal_stop(handle);
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 58339393fa6e..ddd715e42a5c 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -38,8 +38,8 @@ ext4_group_t ext4_get_group_number(struct super_block *sb,
38 ext4_group_t group; 38 ext4_group_t group;
39 39
40 if (test_opt2(sb, STD_GROUP_SIZE)) 40 if (test_opt2(sb, STD_GROUP_SIZE))
41 group = (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) + 41 group = (block -
42 block) >> 42 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) >>
43 (EXT4_BLOCK_SIZE_BITS(sb) + EXT4_CLUSTER_BITS(sb) + 3); 43 (EXT4_BLOCK_SIZE_BITS(sb) + EXT4_CLUSTER_BITS(sb) + 3);
44 else 44 else
45 ext4_get_group_no_and_offset(sb, block, &group, NULL); 45 ext4_get_group_no_and_offset(sb, block, &group, NULL);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index b577e45425b0..0ab26fbf3380 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2086,6 +2086,7 @@ extern int ext4_sync_inode(handle_t *, struct inode *);
2086extern void ext4_dirty_inode(struct inode *, int); 2086extern void ext4_dirty_inode(struct inode *, int);
2087extern int ext4_change_inode_journal_flag(struct inode *, int); 2087extern int ext4_change_inode_journal_flag(struct inode *, int);
2088extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); 2088extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
2089extern int ext4_inode_attach_jinode(struct inode *inode);
2089extern int ext4_can_truncate(struct inode *inode); 2090extern int ext4_can_truncate(struct inode *inode);
2090extern void ext4_truncate(struct inode *); 2091extern void ext4_truncate(struct inode *);
2091extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length); 2092extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length);
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 72a3600aedbd..17ac112ab101 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -255,10 +255,10 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
255 set_buffer_prio(bh); 255 set_buffer_prio(bh);
256 if (ext4_handle_valid(handle)) { 256 if (ext4_handle_valid(handle)) {
257 err = jbd2_journal_dirty_metadata(handle, bh); 257 err = jbd2_journal_dirty_metadata(handle, bh);
258 if (err) { 258 /* Errors can only happen if there is a bug */
259 /* Errors can only happen if there is a bug */ 259 if (WARN_ON_ONCE(err)) {
260 handle->h_err = err; 260 ext4_journal_abort_handle(where, line, __func__, bh,
261 __ext4_journal_stop(where, line, handle); 261 handle, err);
262 } 262 }
263 } else { 263 } else {
264 if (inode) 264 if (inode)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 7097b0f680e6..72ba4705d4fa 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2835,6 +2835,9 @@ again:
2835 err = -EIO; 2835 err = -EIO;
2836 break; 2836 break;
2837 } 2837 }
2838 /* Yield here to deal with large extent trees.
2839 * Should be a no-op if we did IO above. */
2840 cond_resched();
2838 if (WARN_ON(i + 1 > depth)) { 2841 if (WARN_ON(i + 1 > depth)) {
2839 err = -EIO; 2842 err = -EIO;
2840 break; 2843 break;
@@ -4261,8 +4264,8 @@ got_allocated_blocks:
4261 /* not a good idea to call discard here directly, 4264 /* not a good idea to call discard here directly,
4262 * but otherwise we'd need to call it every free() */ 4265 * but otherwise we'd need to call it every free() */
4263 ext4_discard_preallocations(inode); 4266 ext4_discard_preallocations(inode);
4264 ext4_free_blocks(handle, inode, NULL, ext4_ext_pblock(&newex), 4267 ext4_free_blocks(handle, inode, NULL, newblock,
4265 ext4_ext_get_actual_len(&newex), fb_flags); 4268 EXT4_C2B(sbi, allocated_clusters), fb_flags);
4266 goto out2; 4269 goto out2;
4267 } 4270 }
4268 4271
@@ -4382,8 +4385,9 @@ out2:
4382 } 4385 }
4383 4386
4384out3: 4387out3:
4385 trace_ext4_ext_map_blocks_exit(inode, flags, map, err ? err : allocated); 4388 trace_ext4_ext_map_blocks_exit(inode, flags, map,
4386 4389 err ? err : allocated);
4390 ext4_es_lru_add(inode);
4387 return err ? err : allocated; 4391 return err ? err : allocated;
4388} 4392}
4389 4393
@@ -4405,9 +4409,20 @@ void ext4_ext_truncate(handle_t *handle, struct inode *inode)
4405 4409
4406 last_block = (inode->i_size + sb->s_blocksize - 1) 4410 last_block = (inode->i_size + sb->s_blocksize - 1)
4407 >> EXT4_BLOCK_SIZE_BITS(sb); 4411 >> EXT4_BLOCK_SIZE_BITS(sb);
4412retry:
4408 err = ext4_es_remove_extent(inode, last_block, 4413 err = ext4_es_remove_extent(inode, last_block,
4409 EXT_MAX_BLOCKS - last_block); 4414 EXT_MAX_BLOCKS - last_block);
4415 if (err == -ENOMEM) {
4416 cond_resched();
4417 congestion_wait(BLK_RW_ASYNC, HZ/50);
4418 goto retry;
4419 }
4420 if (err) {
4421 ext4_std_error(inode->i_sb, err);
4422 return;
4423 }
4410 err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); 4424 err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
4425 ext4_std_error(inode->i_sb, err);
4411} 4426}
4412 4427
4413static void ext4_falloc_update_inode(struct inode *inode, 4428static void ext4_falloc_update_inode(struct inode *inode,
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index ee018d5f397e..91cb110da1b4 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -148,6 +148,8 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
148 ext4_lblk_t end); 148 ext4_lblk_t end);
149static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, 149static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
150 int nr_to_scan); 150 int nr_to_scan);
151static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
152 struct ext4_inode_info *locked_ei);
151 153
152int __init ext4_init_es(void) 154int __init ext4_init_es(void)
153{ 155{
@@ -439,7 +441,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode,
439 */ 441 */
440 if (!ext4_es_is_written(es) && !ext4_es_is_unwritten(es)) { 442 if (!ext4_es_is_written(es) && !ext4_es_is_unwritten(es)) {
441 if (in_range(es->es_lblk, ee_block, ee_len)) { 443 if (in_range(es->es_lblk, ee_block, ee_len)) {
442 pr_warn("ES insert assertation failed for " 444 pr_warn("ES insert assertion failed for "
443 "inode: %lu we can find an extent " 445 "inode: %lu we can find an extent "
444 "at block [%d/%d/%llu/%c], but we " 446 "at block [%d/%d/%llu/%c], but we "
445 "want to add an delayed/hole extent " 447 "want to add an delayed/hole extent "
@@ -458,7 +460,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode,
458 */ 460 */
459 if (es->es_lblk < ee_block || 461 if (es->es_lblk < ee_block ||
460 ext4_es_pblock(es) != ee_start + es->es_lblk - ee_block) { 462 ext4_es_pblock(es) != ee_start + es->es_lblk - ee_block) {
461 pr_warn("ES insert assertation failed for inode: %lu " 463 pr_warn("ES insert assertion failed for inode: %lu "
462 "ex_status [%d/%d/%llu/%c] != " 464 "ex_status [%d/%d/%llu/%c] != "
463 "es_status [%d/%d/%llu/%c]\n", inode->i_ino, 465 "es_status [%d/%d/%llu/%c]\n", inode->i_ino,
464 ee_block, ee_len, ee_start, 466 ee_block, ee_len, ee_start,
@@ -468,7 +470,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode,
468 } 470 }
469 471
470 if (ee_status ^ es_status) { 472 if (ee_status ^ es_status) {
471 pr_warn("ES insert assertation failed for inode: %lu " 473 pr_warn("ES insert assertion failed for inode: %lu "
472 "ex_status [%d/%d/%llu/%c] != " 474 "ex_status [%d/%d/%llu/%c] != "
473 "es_status [%d/%d/%llu/%c]\n", inode->i_ino, 475 "es_status [%d/%d/%llu/%c]\n", inode->i_ino,
474 ee_block, ee_len, ee_start, 476 ee_block, ee_len, ee_start,
@@ -481,7 +483,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode,
481 * that we don't want to add an written/unwritten extent. 483 * that we don't want to add an written/unwritten extent.
482 */ 484 */
483 if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) { 485 if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) {
484 pr_warn("ES insert assertation failed for inode: %lu " 486 pr_warn("ES insert assertion failed for inode: %lu "
485 "can't find an extent at block %d but we want " 487 "can't find an extent at block %d but we want "
486 "to add an written/unwritten extent " 488 "to add an written/unwritten extent "
487 "[%d/%d/%llu/%llx]\n", inode->i_ino, 489 "[%d/%d/%llu/%llx]\n", inode->i_ino,
@@ -519,7 +521,7 @@ static void ext4_es_insert_extent_ind_check(struct inode *inode,
519 * We want to add a delayed/hole extent but this 521 * We want to add a delayed/hole extent but this
520 * block has been allocated. 522 * block has been allocated.
521 */ 523 */
522 pr_warn("ES insert assertation failed for inode: %lu " 524 pr_warn("ES insert assertion failed for inode: %lu "
523 "We can find blocks but we want to add a " 525 "We can find blocks but we want to add a "
524 "delayed/hole extent [%d/%d/%llu/%llx]\n", 526 "delayed/hole extent [%d/%d/%llu/%llx]\n",
525 inode->i_ino, es->es_lblk, es->es_len, 527 inode->i_ino, es->es_lblk, es->es_len,
@@ -527,13 +529,13 @@ static void ext4_es_insert_extent_ind_check(struct inode *inode,
527 return; 529 return;
528 } else if (ext4_es_is_written(es)) { 530 } else if (ext4_es_is_written(es)) {
529 if (retval != es->es_len) { 531 if (retval != es->es_len) {
530 pr_warn("ES insert assertation failed for " 532 pr_warn("ES insert assertion failed for "
531 "inode: %lu retval %d != es_len %d\n", 533 "inode: %lu retval %d != es_len %d\n",
532 inode->i_ino, retval, es->es_len); 534 inode->i_ino, retval, es->es_len);
533 return; 535 return;
534 } 536 }
535 if (map.m_pblk != ext4_es_pblock(es)) { 537 if (map.m_pblk != ext4_es_pblock(es)) {
536 pr_warn("ES insert assertation failed for " 538 pr_warn("ES insert assertion failed for "
537 "inode: %lu m_pblk %llu != " 539 "inode: %lu m_pblk %llu != "
538 "es_pblk %llu\n", 540 "es_pblk %llu\n",
539 inode->i_ino, map.m_pblk, 541 inode->i_ino, map.m_pblk,
@@ -549,7 +551,7 @@ static void ext4_es_insert_extent_ind_check(struct inode *inode,
549 } 551 }
550 } else if (retval == 0) { 552 } else if (retval == 0) {
551 if (ext4_es_is_written(es)) { 553 if (ext4_es_is_written(es)) {
552 pr_warn("ES insert assertation failed for inode: %lu " 554 pr_warn("ES insert assertion failed for inode: %lu "
553 "We can't find the block but we want to add " 555 "We can't find the block but we want to add "
554 "an written extent [%d/%d/%llu/%llx]\n", 556 "an written extent [%d/%d/%llu/%llx]\n",
555 inode->i_ino, es->es_lblk, es->es_len, 557 inode->i_ino, es->es_lblk, es->es_len,
@@ -632,10 +634,8 @@ out:
632} 634}
633 635
634/* 636/*
635 * ext4_es_insert_extent() adds a space to a extent status tree. 637 * ext4_es_insert_extent() adds information to an inode's extent
636 * 638 * status tree.
637 * ext4_es_insert_extent is called by ext4_da_write_begin and
638 * ext4_es_remove_extent.
639 * 639 *
640 * Return 0 on success, error code on failure. 640 * Return 0 on success, error code on failure.
641 */ 641 */
@@ -667,7 +667,13 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
667 err = __es_remove_extent(inode, lblk, end); 667 err = __es_remove_extent(inode, lblk, end);
668 if (err != 0) 668 if (err != 0)
669 goto error; 669 goto error;
670retry:
670 err = __es_insert_extent(inode, &newes); 671 err = __es_insert_extent(inode, &newes);
672 if (err == -ENOMEM && __ext4_es_shrink(EXT4_SB(inode->i_sb), 1,
673 EXT4_I(inode)))
674 goto retry;
675 if (err == -ENOMEM && !ext4_es_is_delayed(&newes))
676 err = 0;
671 677
672error: 678error:
673 write_unlock(&EXT4_I(inode)->i_es_lock); 679 write_unlock(&EXT4_I(inode)->i_es_lock);
@@ -746,8 +752,10 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
746 struct extent_status orig_es; 752 struct extent_status orig_es;
747 ext4_lblk_t len1, len2; 753 ext4_lblk_t len1, len2;
748 ext4_fsblk_t block; 754 ext4_fsblk_t block;
749 int err = 0; 755 int err;
750 756
757retry:
758 err = 0;
751 es = __es_tree_search(&tree->root, lblk); 759 es = __es_tree_search(&tree->root, lblk);
752 if (!es) 760 if (!es)
753 goto out; 761 goto out;
@@ -782,6 +790,10 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
782 if (err) { 790 if (err) {
783 es->es_lblk = orig_es.es_lblk; 791 es->es_lblk = orig_es.es_lblk;
784 es->es_len = orig_es.es_len; 792 es->es_len = orig_es.es_len;
793 if ((err == -ENOMEM) &&
794 __ext4_es_shrink(EXT4_SB(inode->i_sb), 1,
795 EXT4_I(inode)))
796 goto retry;
785 goto out; 797 goto out;
786 } 798 }
787 } else { 799 } else {
@@ -891,22 +903,14 @@ static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a,
891 return -1; 903 return -1;
892} 904}
893 905
894static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) 906static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
907 struct ext4_inode_info *locked_ei)
895{ 908{
896 struct ext4_sb_info *sbi = container_of(shrink,
897 struct ext4_sb_info, s_es_shrinker);
898 struct ext4_inode_info *ei; 909 struct ext4_inode_info *ei;
899 struct list_head *cur, *tmp; 910 struct list_head *cur, *tmp;
900 LIST_HEAD(skiped); 911 LIST_HEAD(skiped);
901 int nr_to_scan = sc->nr_to_scan;
902 int ret, nr_shrunk = 0; 912 int ret, nr_shrunk = 0;
903 913
904 ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt);
905 trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret);
906
907 if (!nr_to_scan)
908 return ret;
909
910 spin_lock(&sbi->s_es_lru_lock); 914 spin_lock(&sbi->s_es_lru_lock);
911 915
912 /* 916 /*
@@ -935,7 +939,7 @@ static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
935 continue; 939 continue;
936 } 940 }
937 941
938 if (ei->i_es_lru_nr == 0) 942 if (ei->i_es_lru_nr == 0 || ei == locked_ei)
939 continue; 943 continue;
940 944
941 write_lock(&ei->i_es_lock); 945 write_lock(&ei->i_es_lock);
@@ -954,6 +958,27 @@ static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
954 list_splice_tail(&skiped, &sbi->s_es_lru); 958 list_splice_tail(&skiped, &sbi->s_es_lru);
955 spin_unlock(&sbi->s_es_lru_lock); 959 spin_unlock(&sbi->s_es_lru_lock);
956 960
961 if (locked_ei && nr_shrunk == 0)
962 nr_shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan);
963
964 return nr_shrunk;
965}
966
967static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
968{
969 struct ext4_sb_info *sbi = container_of(shrink,
970 struct ext4_sb_info, s_es_shrinker);
971 int nr_to_scan = sc->nr_to_scan;
972 int ret, nr_shrunk;
973
974 ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt);
975 trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret);
976
977 if (!nr_to_scan)
978 return ret;
979
980 nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL);
981
957 ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); 982 ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt);
958 trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret); 983 trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret);
959 return ret; 984 return ret;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 6f4cc567c382..319c9d26279a 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -219,7 +219,6 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
219{ 219{
220 struct super_block *sb = inode->i_sb; 220 struct super_block *sb = inode->i_sb;
221 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 221 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
222 struct ext4_inode_info *ei = EXT4_I(inode);
223 struct vfsmount *mnt = filp->f_path.mnt; 222 struct vfsmount *mnt = filp->f_path.mnt;
224 struct path path; 223 struct path path;
225 char buf[64], *cp; 224 char buf[64], *cp;
@@ -259,22 +258,10 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
259 * Set up the jbd2_inode if we are opening the inode for 258 * Set up the jbd2_inode if we are opening the inode for
260 * writing and the journal is present 259 * writing and the journal is present
261 */ 260 */
262 if (sbi->s_journal && !ei->jinode && (filp->f_mode & FMODE_WRITE)) { 261 if (filp->f_mode & FMODE_WRITE) {
263 struct jbd2_inode *jinode = jbd2_alloc_inode(GFP_KERNEL); 262 int ret = ext4_inode_attach_jinode(inode);
264 263 if (ret < 0)
265 spin_lock(&inode->i_lock); 264 return ret;
266 if (!ei->jinode) {
267 if (!jinode) {
268 spin_unlock(&inode->i_lock);
269 return -ENOMEM;
270 }
271 ei->jinode = jinode;
272 jbd2_journal_init_jbd_inode(ei->jinode, inode);
273 jinode = NULL;
274 }
275 spin_unlock(&inode->i_lock);
276 if (unlikely(jinode != NULL))
277 jbd2_free_inode(jinode);
278 } 265 }
279 return dquot_file_open(inode, filp); 266 return dquot_file_open(inode, filp);
280} 267}
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index f03598c6ffd3..8bf5999875ee 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -734,11 +734,8 @@ repeat_in_this_group:
734 ino = ext4_find_next_zero_bit((unsigned long *) 734 ino = ext4_find_next_zero_bit((unsigned long *)
735 inode_bitmap_bh->b_data, 735 inode_bitmap_bh->b_data,
736 EXT4_INODES_PER_GROUP(sb), ino); 736 EXT4_INODES_PER_GROUP(sb), ino);
737 if (ino >= EXT4_INODES_PER_GROUP(sb)) { 737 if (ino >= EXT4_INODES_PER_GROUP(sb))
738 if (++group == ngroups) 738 goto next_group;
739 group = 0;
740 continue;
741 }
742 if (group == 0 && (ino+1) < EXT4_FIRST_INO(sb)) { 739 if (group == 0 && (ino+1) < EXT4_FIRST_INO(sb)) {
743 ext4_error(sb, "reserved inode found cleared - " 740 ext4_error(sb, "reserved inode found cleared - "
744 "inode=%lu", ino + 1); 741 "inode=%lu", ino + 1);
@@ -769,6 +766,9 @@ repeat_in_this_group:
769 goto got; /* we grabbed the inode! */ 766 goto got; /* we grabbed the inode! */
770 if (ino < EXT4_INODES_PER_GROUP(sb)) 767 if (ino < EXT4_INODES_PER_GROUP(sb))
771 goto repeat_in_this_group; 768 goto repeat_in_this_group;
769next_group:
770 if (++group == ngroups)
771 group = 0;
772 } 772 }
773 err = -ENOSPC; 773 err = -ENOSPC;
774 goto out; 774 goto out;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 0188e65e1f58..c2ca04e67a4f 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -465,7 +465,7 @@ static void ext4_map_blocks_es_recheck(handle_t *handle,
465 if (es_map->m_lblk != map->m_lblk || 465 if (es_map->m_lblk != map->m_lblk ||
466 es_map->m_flags != map->m_flags || 466 es_map->m_flags != map->m_flags ||
467 es_map->m_pblk != map->m_pblk) { 467 es_map->m_pblk != map->m_pblk) {
468 printk("ES cache assertation failed for inode: %lu " 468 printk("ES cache assertion failed for inode: %lu "
469 "es_cached ex [%d/%d/%llu/%x] != " 469 "es_cached ex [%d/%d/%llu/%x] != "
470 "found ex [%d/%d/%llu/%x] retval %d flags %x\n", 470 "found ex [%d/%d/%llu/%x] retval %d flags %x\n",
471 inode->i_ino, es_map->m_lblk, es_map->m_len, 471 inode->i_ino, es_map->m_lblk, es_map->m_len,
@@ -514,10 +514,9 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
514 "logical block %lu\n", inode->i_ino, flags, map->m_len, 514 "logical block %lu\n", inode->i_ino, flags, map->m_len,
515 (unsigned long) map->m_lblk); 515 (unsigned long) map->m_lblk);
516 516
517 ext4_es_lru_add(inode);
518
519 /* Lookup extent status tree firstly */ 517 /* Lookup extent status tree firstly */
520 if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { 518 if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
519 ext4_es_lru_add(inode);
521 if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) { 520 if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) {
522 map->m_pblk = ext4_es_pblock(&es) + 521 map->m_pblk = ext4_es_pblock(&es) +
523 map->m_lblk - es.es_lblk; 522 map->m_lblk - es.es_lblk;
@@ -556,14 +555,13 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
556 int ret; 555 int ret;
557 unsigned long long status; 556 unsigned long long status;
558 557
559#ifdef ES_AGGRESSIVE_TEST 558 if (unlikely(retval != map->m_len)) {
560 if (retval != map->m_len) { 559 ext4_warning(inode->i_sb,
561 printk("ES len assertation failed for inode: %lu " 560 "ES len assertion failed for inode "
562 "retval %d != map->m_len %d " 561 "%lu: retval %d != map->m_len %d",
563 "in %s (lookup)\n", inode->i_ino, retval, 562 inode->i_ino, retval, map->m_len);
564 map->m_len, __func__); 563 WARN_ON(1);
565 } 564 }
566#endif
567 565
568 status = map->m_flags & EXT4_MAP_UNWRITTEN ? 566 status = map->m_flags & EXT4_MAP_UNWRITTEN ?
569 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; 567 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
@@ -657,14 +655,13 @@ found:
657 int ret; 655 int ret;
658 unsigned long long status; 656 unsigned long long status;
659 657
660#ifdef ES_AGGRESSIVE_TEST 658 if (unlikely(retval != map->m_len)) {
661 if (retval != map->m_len) { 659 ext4_warning(inode->i_sb,
662 printk("ES len assertation failed for inode: %lu " 660 "ES len assertion failed for inode "
663 "retval %d != map->m_len %d " 661 "%lu: retval %d != map->m_len %d",
664 "in %s (allocation)\n", inode->i_ino, retval, 662 inode->i_ino, retval, map->m_len);
665 map->m_len, __func__); 663 WARN_ON(1);
666 } 664 }
667#endif
668 665
669 /* 666 /*
670 * If the extent has been zeroed out, we don't need to update 667 * If the extent has been zeroed out, we don't need to update
@@ -1529,11 +1526,9 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
1529 "logical block %lu\n", inode->i_ino, map->m_len, 1526 "logical block %lu\n", inode->i_ino, map->m_len,
1530 (unsigned long) map->m_lblk); 1527 (unsigned long) map->m_lblk);
1531 1528
1532 ext4_es_lru_add(inode);
1533
1534 /* Lookup extent status tree firstly */ 1529 /* Lookup extent status tree firstly */
1535 if (ext4_es_lookup_extent(inode, iblock, &es)) { 1530 if (ext4_es_lookup_extent(inode, iblock, &es)) {
1536 1531 ext4_es_lru_add(inode);
1537 if (ext4_es_is_hole(&es)) { 1532 if (ext4_es_is_hole(&es)) {
1538 retval = 0; 1533 retval = 0;
1539 down_read((&EXT4_I(inode)->i_data_sem)); 1534 down_read((&EXT4_I(inode)->i_data_sem));
@@ -1640,14 +1635,13 @@ add_delayed:
1640 int ret; 1635 int ret;
1641 unsigned long long status; 1636 unsigned long long status;
1642 1637
1643#ifdef ES_AGGRESSIVE_TEST 1638 if (unlikely(retval != map->m_len)) {
1644 if (retval != map->m_len) { 1639 ext4_warning(inode->i_sb,
1645 printk("ES len assertation failed for inode: %lu " 1640 "ES len assertion failed for inode "
1646 "retval %d != map->m_len %d " 1641 "%lu: retval %d != map->m_len %d",
1647 "in %s (lookup)\n", inode->i_ino, retval, 1642 inode->i_ino, retval, map->m_len);
1648 map->m_len, __func__); 1643 WARN_ON(1);
1649 } 1644 }
1650#endif
1651 1645
1652 status = map->m_flags & EXT4_MAP_UNWRITTEN ? 1646 status = map->m_flags & EXT4_MAP_UNWRITTEN ?
1653 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; 1647 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
@@ -2163,7 +2157,7 @@ static int mpage_map_and_submit_extent(handle_t *handle,
2163 2157
2164 mpd->io_submit.io_end->offset = 2158 mpd->io_submit.io_end->offset =
2165 ((loff_t)map->m_lblk) << inode->i_blkbits; 2159 ((loff_t)map->m_lblk) << inode->i_blkbits;
2166 while (map->m_len) { 2160 do {
2167 err = mpage_map_one_extent(handle, mpd); 2161 err = mpage_map_one_extent(handle, mpd);
2168 if (err < 0) { 2162 if (err < 0) {
2169 struct super_block *sb = inode->i_sb; 2163 struct super_block *sb = inode->i_sb;
@@ -2201,7 +2195,7 @@ static int mpage_map_and_submit_extent(handle_t *handle,
2201 err = mpage_map_and_submit_buffers(mpd); 2195 err = mpage_map_and_submit_buffers(mpd);
2202 if (err < 0) 2196 if (err < 0)
2203 return err; 2197 return err;
2204 } 2198 } while (map->m_len);
2205 2199
2206 /* Update on-disk size after IO is submitted */ 2200 /* Update on-disk size after IO is submitted */
2207 disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT; 2201 disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT;
@@ -3539,6 +3533,18 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
3539 offset; 3533 offset;
3540 } 3534 }
3541 3535
3536 if (offset & (sb->s_blocksize - 1) ||
3537 (offset + length) & (sb->s_blocksize - 1)) {
3538 /*
3539 * Attach jinode to inode for jbd2 if we do any zeroing of
3540 * partial block
3541 */
3542 ret = ext4_inode_attach_jinode(inode);
3543 if (ret < 0)
3544 goto out_mutex;
3545
3546 }
3547
3542 first_block_offset = round_up(offset, sb->s_blocksize); 3548 first_block_offset = round_up(offset, sb->s_blocksize);
3543 last_block_offset = round_down((offset + length), sb->s_blocksize) - 1; 3549 last_block_offset = round_down((offset + length), sb->s_blocksize) - 1;
3544 3550
@@ -3607,6 +3613,31 @@ out_mutex:
3607 return ret; 3613 return ret;
3608} 3614}
3609 3615
3616int ext4_inode_attach_jinode(struct inode *inode)
3617{
3618 struct ext4_inode_info *ei = EXT4_I(inode);
3619 struct jbd2_inode *jinode;
3620
3621 if (ei->jinode || !EXT4_SB(inode->i_sb)->s_journal)
3622 return 0;
3623
3624 jinode = jbd2_alloc_inode(GFP_KERNEL);
3625 spin_lock(&inode->i_lock);
3626 if (!ei->jinode) {
3627 if (!jinode) {
3628 spin_unlock(&inode->i_lock);
3629 return -ENOMEM;
3630 }
3631 ei->jinode = jinode;
3632 jbd2_journal_init_jbd_inode(ei->jinode, inode);
3633 jinode = NULL;
3634 }
3635 spin_unlock(&inode->i_lock);
3636 if (unlikely(jinode != NULL))
3637 jbd2_free_inode(jinode);
3638 return 0;
3639}
3640
3610/* 3641/*
3611 * ext4_truncate() 3642 * ext4_truncate()
3612 * 3643 *
@@ -3667,6 +3698,12 @@ void ext4_truncate(struct inode *inode)
3667 return; 3698 return;
3668 } 3699 }
3669 3700
3701 /* If we zero-out tail of the page, we have to create jinode for jbd2 */
3702 if (inode->i_size & (inode->i_sb->s_blocksize - 1)) {
3703 if (ext4_inode_attach_jinode(inode) < 0)
3704 return;
3705 }
3706
3670 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 3707 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
3671 credits = ext4_writepage_trans_blocks(inode); 3708 credits = ext4_writepage_trans_blocks(inode);
3672 else 3709 else
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 9491ac0590f7..c0427e2f6648 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -77,8 +77,10 @@ static void swap_inode_data(struct inode *inode1, struct inode *inode2)
77 memswap(ei1->i_data, ei2->i_data, sizeof(ei1->i_data)); 77 memswap(ei1->i_data, ei2->i_data, sizeof(ei1->i_data));
78 memswap(&ei1->i_flags, &ei2->i_flags, sizeof(ei1->i_flags)); 78 memswap(&ei1->i_flags, &ei2->i_flags, sizeof(ei1->i_flags));
79 memswap(&ei1->i_disksize, &ei2->i_disksize, sizeof(ei1->i_disksize)); 79 memswap(&ei1->i_disksize, &ei2->i_disksize, sizeof(ei1->i_disksize));
80 memswap(&ei1->i_es_tree, &ei2->i_es_tree, sizeof(ei1->i_es_tree)); 80 ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS);
81 memswap(&ei1->i_es_lru_nr, &ei2->i_es_lru_nr, sizeof(ei1->i_es_lru_nr)); 81 ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS);
82 ext4_es_lru_del(inode1);
83 ext4_es_lru_del(inode2);
82 84
83 isize = i_size_read(inode1); 85 isize = i_size_read(inode1);
84 i_size_write(inode1, i_size_read(inode2)); 86 i_size_write(inode1, i_size_read(inode2));
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index a9ff5e5137ca..4bbbf13bd743 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4740,11 +4740,16 @@ do_more:
4740 * blocks being freed are metadata. these blocks shouldn't 4740 * blocks being freed are metadata. these blocks shouldn't
4741 * be used until this transaction is committed 4741 * be used until this transaction is committed
4742 */ 4742 */
4743 retry:
4743 new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS); 4744 new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS);
4744 if (!new_entry) { 4745 if (!new_entry) {
4745 ext4_mb_unload_buddy(&e4b); 4746 /*
4746 err = -ENOMEM; 4747 * We use a retry loop because
4747 goto error_return; 4748 * ext4_free_blocks() is not allowed to fail.
4749 */
4750 cond_resched();
4751 congestion_wait(BLK_RW_ASYNC, HZ/50);
4752 goto retry;
4748 } 4753 }
4749 new_entry->efd_start_cluster = bit; 4754 new_entry->efd_start_cluster = bit;
4750 new_entry->efd_group = block_group; 4755 new_entry->efd_group = block_group;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 234b834d5a97..35f55a0dbc4b 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2316,11 +2316,11 @@ retry:
2316 inode->i_op = &ext4_file_inode_operations; 2316 inode->i_op = &ext4_file_inode_operations;
2317 inode->i_fop = &ext4_file_operations; 2317 inode->i_fop = &ext4_file_operations;
2318 ext4_set_aops(inode); 2318 ext4_set_aops(inode);
2319 d_tmpfile(dentry, inode);
2319 err = ext4_orphan_add(handle, inode); 2320 err = ext4_orphan_add(handle, inode);
2320 if (err) 2321 if (err)
2321 goto err_drop_inode; 2322 goto err_drop_inode;
2322 mark_inode_dirty(inode); 2323 mark_inode_dirty(inode);
2323 d_tmpfile(dentry, inode);
2324 unlock_new_inode(inode); 2324 unlock_new_inode(inode);
2325 } 2325 }
2326 if (handle) 2326 if (handle)
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 48786cdb5e6c..6625d210fb45 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -25,6 +25,7 @@
25#include <linux/kernel.h> 25#include <linux/kernel.h>
26#include <linux/slab.h> 26#include <linux/slab.h>
27#include <linux/mm.h> 27#include <linux/mm.h>
28#include <linux/ratelimit.h>
28 29
29#include "ext4_jbd2.h" 30#include "ext4_jbd2.h"
30#include "xattr.h" 31#include "xattr.h"
@@ -55,7 +56,7 @@ void ext4_exit_pageio(void)
55static void buffer_io_error(struct buffer_head *bh) 56static void buffer_io_error(struct buffer_head *bh)
56{ 57{
57 char b[BDEVNAME_SIZE]; 58 char b[BDEVNAME_SIZE];
58 printk(KERN_ERR "Buffer I/O error on device %s, logical block %llu\n", 59 printk_ratelimited(KERN_ERR "Buffer I/O error on device %s, logical block %llu\n",
59 bdevname(bh->b_bdev, b), 60 bdevname(bh->b_bdev, b),
60 (unsigned long long)bh->b_blocknr); 61 (unsigned long long)bh->b_blocknr);
61} 62}
@@ -308,6 +309,7 @@ ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end)
308 return io_end; 309 return io_end;
309} 310}
310 311
312/* BIO completion function for page writeback */
311static void ext4_end_bio(struct bio *bio, int error) 313static void ext4_end_bio(struct bio *bio, int error)
312{ 314{
313 ext4_io_end_t *io_end = bio->bi_private; 315 ext4_io_end_t *io_end = bio->bi_private;
@@ -318,18 +320,6 @@ static void ext4_end_bio(struct bio *bio, int error)
318 if (test_bit(BIO_UPTODATE, &bio->bi_flags)) 320 if (test_bit(BIO_UPTODATE, &bio->bi_flags))
319 error = 0; 321 error = 0;
320 322
321 if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
322 /*
323 * Link bio into list hanging from io_end. We have to do it
324 * atomically as bio completions can be racing against each
325 * other.
326 */
327 bio->bi_private = xchg(&io_end->bio, bio);
328 } else {
329 ext4_finish_bio(bio);
330 bio_put(bio);
331 }
332
333 if (error) { 323 if (error) {
334 struct inode *inode = io_end->inode; 324 struct inode *inode = io_end->inode;
335 325
@@ -341,7 +331,24 @@ static void ext4_end_bio(struct bio *bio, int error)
341 (unsigned long long) 331 (unsigned long long)
342 bi_sector >> (inode->i_blkbits - 9)); 332 bi_sector >> (inode->i_blkbits - 9));
343 } 333 }
344 ext4_put_io_end_defer(io_end); 334
335 if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
336 /*
337 * Link bio into list hanging from io_end. We have to do it
338 * atomically as bio completions can be racing against each
339 * other.
340 */
341 bio->bi_private = xchg(&io_end->bio, bio);
342 ext4_put_io_end_defer(io_end);
343 } else {
344 /*
345 * Drop io_end reference early. Inode can get freed once
346 * we finish the bio.
347 */
348 ext4_put_io_end_defer(io_end);
349 ext4_finish_bio(bio);
350 bio_put(bio);
351 }
345} 352}
346 353
347void ext4_io_submit(struct ext4_io_submit *io) 354void ext4_io_submit(struct ext4_io_submit *io)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 85b3dd60169b..b59373b625e9 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1359,7 +1359,7 @@ static const struct mount_opts {
1359 {Opt_delalloc, EXT4_MOUNT_DELALLOC, 1359 {Opt_delalloc, EXT4_MOUNT_DELALLOC,
1360 MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT}, 1360 MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1361 {Opt_nodelalloc, EXT4_MOUNT_DELALLOC, 1361 {Opt_nodelalloc, EXT4_MOUNT_DELALLOC,
1362 MOPT_EXT4_ONLY | MOPT_CLEAR | MOPT_EXPLICIT}, 1362 MOPT_EXT4_ONLY | MOPT_CLEAR},
1363 {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, 1363 {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
1364 MOPT_EXT4_ONLY | MOPT_SET}, 1364 MOPT_EXT4_ONLY | MOPT_SET},
1365 {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT | 1365 {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT |
@@ -1702,12 +1702,6 @@ static inline void ext4_show_quota_options(struct seq_file *seq,
1702 1702
1703 if (sbi->s_qf_names[GRPQUOTA]) 1703 if (sbi->s_qf_names[GRPQUOTA])
1704 seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); 1704 seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
1705
1706 if (test_opt(sb, USRQUOTA))
1707 seq_puts(seq, ",usrquota");
1708
1709 if (test_opt(sb, GRPQUOTA))
1710 seq_puts(seq, ",grpquota");
1711#endif 1705#endif
1712} 1706}
1713 1707
@@ -3489,7 +3483,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3489 } 3483 }
3490 if (test_opt(sb, DIOREAD_NOLOCK)) { 3484 if (test_opt(sb, DIOREAD_NOLOCK)) {
3491 ext4_msg(sb, KERN_ERR, "can't mount with " 3485 ext4_msg(sb, KERN_ERR, "can't mount with "
3492 "both data=journal and delalloc"); 3486 "both data=journal and dioread_nolock");
3493 goto failed_mount; 3487 goto failed_mount;
3494 } 3488 }
3495 if (test_opt(sb, DELALLOC)) 3489 if (test_opt(sb, DELALLOC))
@@ -3624,10 +3618,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3624 sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); 3618 sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
3625 sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); 3619 sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
3626 3620
3627 /* Do we have standard group size of blocksize * 8 blocks ? */
3628 if (sbi->s_blocks_per_group == blocksize << 3)
3629 set_opt2(sb, STD_GROUP_SIZE);
3630
3631 for (i = 0; i < 4; i++) 3621 for (i = 0; i < 4; i++)
3632 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); 3622 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
3633 sbi->s_def_hash_version = es->s_def_hash_version; 3623 sbi->s_def_hash_version = es->s_def_hash_version;
@@ -3697,6 +3687,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3697 goto failed_mount; 3687 goto failed_mount;
3698 } 3688 }
3699 3689
3690 /* Do we have standard group size of clustersize * 8 blocks ? */
3691 if (sbi->s_blocks_per_group == clustersize << 3)
3692 set_opt2(sb, STD_GROUP_SIZE);
3693
3700 /* 3694 /*
3701 * Test whether we have more sectors than will fit in sector_t, 3695 * Test whether we have more sectors than will fit in sector_t,
3702 * and whether the max offset is addressable by the page cache. 3696 * and whether the max offset is addressable by the page cache.
@@ -4733,6 +4727,21 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
4733 goto restore_opts; 4727 goto restore_opts;
4734 } 4728 }
4735 4729
4730 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
4731 if (test_opt2(sb, EXPLICIT_DELALLOC)) {
4732 ext4_msg(sb, KERN_ERR, "can't mount with "
4733 "both data=journal and delalloc");
4734 err = -EINVAL;
4735 goto restore_opts;
4736 }
4737 if (test_opt(sb, DIOREAD_NOLOCK)) {
4738 ext4_msg(sb, KERN_ERR, "can't mount with "
4739 "both data=journal and dioread_nolock");
4740 err = -EINVAL;
4741 goto restore_opts;
4742 }
4743 }
4744
4736 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) 4745 if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
4737 ext4_abort(sb, "Abort forced by user"); 4746 ext4_abort(sb, "Abort forced by user");
4738 4747
@@ -5487,6 +5496,7 @@ static void __exit ext4_exit_fs(void)
5487 kset_unregister(ext4_kset); 5496 kset_unregister(ext4_kset);
5488 ext4_exit_system_zone(); 5497 ext4_exit_system_zone();
5489 ext4_exit_pageio(); 5498 ext4_exit_pageio();
5499 ext4_exit_es();
5490} 5500}
5491 5501
5492MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); 5502MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 6599222536eb..65343c3741ff 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -730,14 +730,14 @@ static int __init fcntl_init(void)
730 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY 730 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
731 * is defined as O_NONBLOCK on some platforms and not on others. 731 * is defined as O_NONBLOCK on some platforms and not on others.
732 */ 732 */
733 BUILD_BUG_ON(19 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( 733 BUILD_BUG_ON(20 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
734 O_RDONLY | O_WRONLY | O_RDWR | 734 O_RDONLY | O_WRONLY | O_RDWR |
735 O_CREAT | O_EXCL | O_NOCTTY | 735 O_CREAT | O_EXCL | O_NOCTTY |
736 O_TRUNC | O_APPEND | /* O_NONBLOCK | */ 736 O_TRUNC | O_APPEND | /* O_NONBLOCK | */
737 __O_SYNC | O_DSYNC | FASYNC | 737 __O_SYNC | O_DSYNC | FASYNC |
738 O_DIRECT | O_LARGEFILE | O_DIRECTORY | 738 O_DIRECT | O_LARGEFILE | O_DIRECTORY |
739 O_NOFOLLOW | O_NOATIME | O_CLOEXEC | 739 O_NOFOLLOW | O_NOATIME | O_CLOEXEC |
740 __FMODE_EXEC | O_PATH 740 __FMODE_EXEC | O_PATH | __O_TMPFILE
741 )); 741 ));
742 742
743 fasync_cache = kmem_cache_create("fasync_cache", 743 fasync_cache = kmem_cache_create("fasync_cache",
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 0eda52738ec4..72a5d5b04494 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1223,30 +1223,46 @@ static int fuse_direntplus_link(struct file *file,
1223 if (name.name[1] == '.' && name.len == 2) 1223 if (name.name[1] == '.' && name.len == 2)
1224 return 0; 1224 return 0;
1225 } 1225 }
1226
1227 if (invalid_nodeid(o->nodeid))
1228 return -EIO;
1229 if (!fuse_valid_type(o->attr.mode))
1230 return -EIO;
1231
1226 fc = get_fuse_conn(dir); 1232 fc = get_fuse_conn(dir);
1227 1233
1228 name.hash = full_name_hash(name.name, name.len); 1234 name.hash = full_name_hash(name.name, name.len);
1229 dentry = d_lookup(parent, &name); 1235 dentry = d_lookup(parent, &name);
1230 if (dentry && dentry->d_inode) { 1236 if (dentry) {
1231 inode = dentry->d_inode; 1237 inode = dentry->d_inode;
1232 if (get_node_id(inode) == o->nodeid) { 1238 if (!inode) {
1239 d_drop(dentry);
1240 } else if (get_node_id(inode) != o->nodeid ||
1241 ((o->attr.mode ^ inode->i_mode) & S_IFMT)) {
1242 err = d_invalidate(dentry);
1243 if (err)
1244 goto out;
1245 } else if (is_bad_inode(inode)) {
1246 err = -EIO;
1247 goto out;
1248 } else {
1233 struct fuse_inode *fi; 1249 struct fuse_inode *fi;
1234 fi = get_fuse_inode(inode); 1250 fi = get_fuse_inode(inode);
1235 spin_lock(&fc->lock); 1251 spin_lock(&fc->lock);
1236 fi->nlookup++; 1252 fi->nlookup++;
1237 spin_unlock(&fc->lock); 1253 spin_unlock(&fc->lock);
1238 1254
1255 fuse_change_attributes(inode, &o->attr,
1256 entry_attr_timeout(o),
1257 attr_version);
1258
1239 /* 1259 /*
1240 * The other branch to 'found' comes via fuse_iget() 1260 * The other branch to 'found' comes via fuse_iget()
1241 * which bumps nlookup inside 1261 * which bumps nlookup inside
1242 */ 1262 */
1243 goto found; 1263 goto found;
1244 } 1264 }
1245 err = d_invalidate(dentry);
1246 if (err)
1247 goto out;
1248 dput(dentry); 1265 dput(dentry);
1249 dentry = NULL;
1250 } 1266 }
1251 1267
1252 dentry = d_alloc(parent, &name); 1268 dentry = d_alloc(parent, &name);
@@ -1259,25 +1275,30 @@ static int fuse_direntplus_link(struct file *file,
1259 if (!inode) 1275 if (!inode)
1260 goto out; 1276 goto out;
1261 1277
1262 alias = d_materialise_unique(dentry, inode); 1278 if (S_ISDIR(inode->i_mode)) {
1263 err = PTR_ERR(alias); 1279 mutex_lock(&fc->inst_mutex);
1264 if (IS_ERR(alias)) 1280 alias = fuse_d_add_directory(dentry, inode);
1265 goto out; 1281 mutex_unlock(&fc->inst_mutex);
1282 err = PTR_ERR(alias);
1283 if (IS_ERR(alias)) {
1284 iput(inode);
1285 goto out;
1286 }
1287 } else {
1288 alias = d_splice_alias(inode, dentry);
1289 }
1290
1266 if (alias) { 1291 if (alias) {
1267 dput(dentry); 1292 dput(dentry);
1268 dentry = alias; 1293 dentry = alias;
1269 } 1294 }
1270 1295
1271found: 1296found:
1272 fuse_change_attributes(inode, &o->attr, entry_attr_timeout(o),
1273 attr_version);
1274
1275 fuse_change_entry_timeout(dentry, o); 1297 fuse_change_entry_timeout(dentry, o);
1276 1298
1277 err = 0; 1299 err = 0;
1278out: 1300out:
1279 if (dentry) 1301 dput(dentry);
1280 dput(dentry);
1281 return err; 1302 return err;
1282} 1303}
1283 1304
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 9435384562a2..544a809819c3 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1838,14 +1838,14 @@ int __init gfs2_glock_init(void)
1838 1838
1839 glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM | 1839 glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM |
1840 WQ_HIGHPRI | WQ_FREEZABLE, 0); 1840 WQ_HIGHPRI | WQ_FREEZABLE, 0);
1841 if (IS_ERR(glock_workqueue)) 1841 if (!glock_workqueue)
1842 return PTR_ERR(glock_workqueue); 1842 return -ENOMEM;
1843 gfs2_delete_workqueue = alloc_workqueue("delete_workqueue", 1843 gfs2_delete_workqueue = alloc_workqueue("delete_workqueue",
1844 WQ_MEM_RECLAIM | WQ_FREEZABLE, 1844 WQ_MEM_RECLAIM | WQ_FREEZABLE,
1845 0); 1845 0);
1846 if (IS_ERR(gfs2_delete_workqueue)) { 1846 if (!gfs2_delete_workqueue) {
1847 destroy_workqueue(glock_workqueue); 1847 destroy_workqueue(glock_workqueue);
1848 return PTR_ERR(gfs2_delete_workqueue); 1848 return -ENOMEM;
1849 } 1849 }
1850 1850
1851 register_shrinker(&glock_shrinker); 1851 register_shrinker(&glock_shrinker);
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 5f2e5224c51c..e2e0a90396e7 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -47,7 +47,8 @@ static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh)
47 * None of the buffers should be dirty, locked, or pinned. 47 * None of the buffers should be dirty, locked, or pinned.
48 */ 48 */
49 49
50static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) 50static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync,
51 unsigned int nr_revokes)
51{ 52{
52 struct gfs2_sbd *sdp = gl->gl_sbd; 53 struct gfs2_sbd *sdp = gl->gl_sbd;
53 struct list_head *head = &gl->gl_ail_list; 54 struct list_head *head = &gl->gl_ail_list;
@@ -57,7 +58,9 @@ static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
57 58
58 gfs2_log_lock(sdp); 59 gfs2_log_lock(sdp);
59 spin_lock(&sdp->sd_ail_lock); 60 spin_lock(&sdp->sd_ail_lock);
60 list_for_each_entry_safe(bd, tmp, head, bd_ail_gl_list) { 61 list_for_each_entry_safe_reverse(bd, tmp, head, bd_ail_gl_list) {
62 if (nr_revokes == 0)
63 break;
61 bh = bd->bd_bh; 64 bh = bd->bd_bh;
62 if (bh->b_state & b_state) { 65 if (bh->b_state & b_state) {
63 if (fsync) 66 if (fsync)
@@ -65,6 +68,7 @@ static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
65 gfs2_ail_error(gl, bh); 68 gfs2_ail_error(gl, bh);
66 } 69 }
67 gfs2_trans_add_revoke(sdp, bd); 70 gfs2_trans_add_revoke(sdp, bd);
71 nr_revokes--;
68 } 72 }
69 GLOCK_BUG_ON(gl, !fsync && atomic_read(&gl->gl_ail_count)); 73 GLOCK_BUG_ON(gl, !fsync && atomic_read(&gl->gl_ail_count));
70 spin_unlock(&sdp->sd_ail_lock); 74 spin_unlock(&sdp->sd_ail_lock);
@@ -91,7 +95,7 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
91 WARN_ON_ONCE(current->journal_info); 95 WARN_ON_ONCE(current->journal_info);
92 current->journal_info = &tr; 96 current->journal_info = &tr;
93 97
94 __gfs2_ail_flush(gl, 0); 98 __gfs2_ail_flush(gl, 0, tr.tr_revokes);
95 99
96 gfs2_trans_end(sdp); 100 gfs2_trans_end(sdp);
97 gfs2_log_flush(sdp, NULL); 101 gfs2_log_flush(sdp, NULL);
@@ -101,15 +105,19 @@ void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
101{ 105{
102 struct gfs2_sbd *sdp = gl->gl_sbd; 106 struct gfs2_sbd *sdp = gl->gl_sbd;
103 unsigned int revokes = atomic_read(&gl->gl_ail_count); 107 unsigned int revokes = atomic_read(&gl->gl_ail_count);
108 unsigned int max_revokes = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / sizeof(u64);
104 int ret; 109 int ret;
105 110
106 if (!revokes) 111 if (!revokes)
107 return; 112 return;
108 113
109 ret = gfs2_trans_begin(sdp, 0, revokes); 114 while (revokes > max_revokes)
115 max_revokes += (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header)) / sizeof(u64);
116
117 ret = gfs2_trans_begin(sdp, 0, max_revokes);
110 if (ret) 118 if (ret)
111 return; 119 return;
112 __gfs2_ail_flush(gl, fsync); 120 __gfs2_ail_flush(gl, fsync, max_revokes);
113 gfs2_trans_end(sdp); 121 gfs2_trans_end(sdp);
114 gfs2_log_flush(sdp, NULL); 122 gfs2_log_flush(sdp, NULL);
115} 123}
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index bbb2715171cd..64915eeae5a7 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -594,7 +594,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
594 } 594 }
595 gfs2_glock_dq_uninit(ghs); 595 gfs2_glock_dq_uninit(ghs);
596 if (IS_ERR(d)) 596 if (IS_ERR(d))
597 return PTR_RET(d); 597 return PTR_ERR(d);
598 return error; 598 return error;
599 } else if (error != -ENOENT) { 599 } else if (error != -ENOENT) {
600 goto fail_gunlock; 600 goto fail_gunlock;
@@ -1750,6 +1750,10 @@ static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name,
1750 struct gfs2_holder gh; 1750 struct gfs2_holder gh;
1751 int ret; 1751 int ret;
1752 1752
1753 /* For selinux during lookup */
1754 if (gfs2_glock_is_locked_by_me(ip->i_gl))
1755 return generic_getxattr(dentry, name, data, size);
1756
1753 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); 1757 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
1754 ret = gfs2_glock_nq(&gh); 1758 ret = gfs2_glock_nq(&gh);
1755 if (ret == 0) { 1759 if (ret == 0) {
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index e04d0e09ee7b..7b0f5043cf24 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -155,7 +155,7 @@ static int __init init_gfs2_fs(void)
155 goto fail_wq; 155 goto fail_wq;
156 156
157 gfs2_control_wq = alloc_workqueue("gfs2_control", 157 gfs2_control_wq = alloc_workqueue("gfs2_control",
158 WQ_NON_REENTRANT | WQ_UNBOUND | WQ_FREEZABLE, 0); 158 WQ_UNBOUND | WQ_FREEZABLE, 0);
159 if (!gfs2_control_wq) 159 if (!gfs2_control_wq)
160 goto fail_recovery; 160 goto fail_recovery;
161 161
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index a3f868ae3fd4..d19b30ababf1 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -463,6 +463,14 @@ static struct inode *hugetlbfs_get_root(struct super_block *sb,
463 return inode; 463 return inode;
464} 464}
465 465
466/*
467 * Hugetlbfs is not reclaimable; therefore its i_mmap_mutex will never
468 * be taken from reclaim -- unlike regular filesystems. This needs an
469 * annotation because huge_pmd_share() does an allocation under
470 * i_mmap_mutex.
471 */
472struct lock_class_key hugetlbfs_i_mmap_mutex_key;
473
466static struct inode *hugetlbfs_get_inode(struct super_block *sb, 474static struct inode *hugetlbfs_get_inode(struct super_block *sb,
467 struct inode *dir, 475 struct inode *dir,
468 umode_t mode, dev_t dev) 476 umode_t mode, dev_t dev)
@@ -474,6 +482,8 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
474 struct hugetlbfs_inode_info *info; 482 struct hugetlbfs_inode_info *info;
475 inode->i_ino = get_next_ino(); 483 inode->i_ino = get_next_ino();
476 inode_init_owner(inode, dir, mode); 484 inode_init_owner(inode, dir, mode);
485 lockdep_set_class(&inode->i_mapping->i_mmap_mutex,
486 &hugetlbfs_i_mmap_mutex_key);
477 inode->i_mapping->a_ops = &hugetlbfs_aops; 487 inode->i_mapping->a_ops = &hugetlbfs_aops;
478 inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info; 488 inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info;
479 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 489 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
@@ -916,14 +926,8 @@ static int get_hstate_idx(int page_size_log)
916 return h - hstates; 926 return h - hstates;
917} 927}
918 928
919static char *hugetlb_dname(struct dentry *dentry, char *buffer, int buflen)
920{
921 return dynamic_dname(dentry, buffer, buflen, "/%s (deleted)",
922 dentry->d_name.name);
923}
924
925static struct dentry_operations anon_ops = { 929static struct dentry_operations anon_ops = {
926 .d_dname = hugetlb_dname 930 .d_dname = simple_dname
927}; 931};
928 932
929/* 933/*
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 01bfe7662751..41e491b8e5d7 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -64,12 +64,17 @@ struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init)
64 nlm_init->protocol, nlm_version, 64 nlm_init->protocol, nlm_version,
65 nlm_init->hostname, nlm_init->noresvport, 65 nlm_init->hostname, nlm_init->noresvport,
66 nlm_init->net); 66 nlm_init->net);
67 if (host == NULL) { 67 if (host == NULL)
68 lockd_down(nlm_init->net); 68 goto out_nohost;
69 return ERR_PTR(-ENOLCK); 69 if (host->h_rpcclnt == NULL && nlm_bind_host(host) == NULL)
70 } 70 goto out_nobind;
71 71
72 return host; 72 return host;
73out_nobind:
74 nlmclnt_release_host(host);
75out_nohost:
76 lockd_down(nlm_init->net);
77 return ERR_PTR(-ENOLCK);
73} 78}
74EXPORT_SYMBOL_GPL(nlmclnt_init); 79EXPORT_SYMBOL_GPL(nlmclnt_init);
75 80
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 9760ecb9b60f..acd394716349 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -125,14 +125,15 @@ static void nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl)
125{ 125{
126 struct nlm_args *argp = &req->a_args; 126 struct nlm_args *argp = &req->a_args;
127 struct nlm_lock *lock = &argp->lock; 127 struct nlm_lock *lock = &argp->lock;
128 char *nodename = req->a_host->h_rpcclnt->cl_nodename;
128 129
129 nlmclnt_next_cookie(&argp->cookie); 130 nlmclnt_next_cookie(&argp->cookie);
130 memcpy(&lock->fh, NFS_FH(file_inode(fl->fl_file)), sizeof(struct nfs_fh)); 131 memcpy(&lock->fh, NFS_FH(file_inode(fl->fl_file)), sizeof(struct nfs_fh));
131 lock->caller = utsname()->nodename; 132 lock->caller = nodename;
132 lock->oh.data = req->a_owner; 133 lock->oh.data = req->a_owner;
133 lock->oh.len = snprintf(req->a_owner, sizeof(req->a_owner), "%u@%s", 134 lock->oh.len = snprintf(req->a_owner, sizeof(req->a_owner), "%u@%s",
134 (unsigned int)fl->fl_u.nfs_fl.owner->pid, 135 (unsigned int)fl->fl_u.nfs_fl.owner->pid,
135 utsname()->nodename); 136 nodename);
136 lock->svid = fl->fl_u.nfs_fl.owner->pid; 137 lock->svid = fl->fl_u.nfs_fl.owner->pid;
137 lock->fl.fl_start = fl->fl_start; 138 lock->fl.fl_start = fl->fl_start;
138 lock->fl.fl_end = fl->fl_end; 139 lock->fl.fl_end = fl->fl_end;
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 067778b0ccc9..e066a3902973 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -951,6 +951,7 @@ nlmsvc_retry_blocked(void)
951 unsigned long timeout = MAX_SCHEDULE_TIMEOUT; 951 unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
952 struct nlm_block *block; 952 struct nlm_block *block;
953 953
954 spin_lock(&nlm_blocked_lock);
954 while (!list_empty(&nlm_blocked) && !kthread_should_stop()) { 955 while (!list_empty(&nlm_blocked) && !kthread_should_stop()) {
955 block = list_entry(nlm_blocked.next, struct nlm_block, b_list); 956 block = list_entry(nlm_blocked.next, struct nlm_block, b_list);
956 957
@@ -960,6 +961,7 @@ nlmsvc_retry_blocked(void)
960 timeout = block->b_when - jiffies; 961 timeout = block->b_when - jiffies;
961 break; 962 break;
962 } 963 }
964 spin_unlock(&nlm_blocked_lock);
963 965
964 dprintk("nlmsvc_retry_blocked(%p, when=%ld)\n", 966 dprintk("nlmsvc_retry_blocked(%p, when=%ld)\n",
965 block, block->b_when); 967 block, block->b_when);
@@ -969,7 +971,9 @@ nlmsvc_retry_blocked(void)
969 retry_deferred_block(block); 971 retry_deferred_block(block);
970 } else 972 } else
971 nlmsvc_grant_blocked(block); 973 nlmsvc_grant_blocked(block);
974 spin_lock(&nlm_blocked_lock);
972 } 975 }
976 spin_unlock(&nlm_blocked_lock);
973 977
974 return timeout; 978 return timeout;
975} 979}
diff --git a/fs/namei.c b/fs/namei.c
index 8b61d103a8a7..89a612e392eb 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3671,15 +3671,11 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
3671 if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0) 3671 if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0)
3672 return -EINVAL; 3672 return -EINVAL;
3673 /* 3673 /*
3674 * To use null names we require CAP_DAC_READ_SEARCH 3674 * Using empty names is equivalent to using AT_SYMLINK_FOLLOW
3675 * This ensures that not everyone will be able to create 3675 * on /proc/self/fd/<fd>.
3676 * handlink using the passed filedescriptor.
3677 */ 3676 */
3678 if (flags & AT_EMPTY_PATH) { 3677 if (flags & AT_EMPTY_PATH)
3679 if (!capable(CAP_DAC_READ_SEARCH))
3680 return -ENOENT;
3681 how = LOOKUP_EMPTY; 3678 how = LOOKUP_EMPTY;
3682 }
3683 3679
3684 if (flags & AT_SYMLINK_FOLLOW) 3680 if (flags & AT_SYMLINK_FOLLOW)
3685 how |= LOOKUP_FOLLOW; 3681 how |= LOOKUP_FOLLOW;
diff --git a/fs/namespace.c b/fs/namespace.c
index 7b1ca9ba0b0a..a45ba4f267fe 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1429,7 +1429,7 @@ struct vfsmount *collect_mounts(struct path *path)
1429 CL_COPY_ALL | CL_PRIVATE); 1429 CL_COPY_ALL | CL_PRIVATE);
1430 namespace_unlock(); 1430 namespace_unlock();
1431 if (IS_ERR(tree)) 1431 if (IS_ERR(tree))
1432 return NULL; 1432 return ERR_CAST(tree);
1433 return &tree->mnt; 1433 return &tree->mnt;
1434} 1434}
1435 1435
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index af6e806044d7..941246f2b43d 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -463,7 +463,6 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
463 unlock_new_inode(inode); 463 unlock_new_inode(inode);
464 } else 464 } else
465 nfs_refresh_inode(inode, fattr); 465 nfs_refresh_inode(inode, fattr);
466 nfs_setsecurity(inode, fattr, label);
467 dprintk("NFS: nfs_fhget(%s/%Ld fh_crc=0x%08x ct=%d)\n", 466 dprintk("NFS: nfs_fhget(%s/%Ld fh_crc=0x%08x ct=%d)\n",
468 inode->i_sb->s_id, 467 inode->i_sb->s_id,
469 (long long)NFS_FILEID(inode), 468 (long long)NFS_FILEID(inode),
@@ -963,9 +962,15 @@ EXPORT_SYMBOL_GPL(nfs_revalidate_inode);
963static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping) 962static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping)
964{ 963{
965 struct nfs_inode *nfsi = NFS_I(inode); 964 struct nfs_inode *nfsi = NFS_I(inode);
966 965 int ret;
966
967 if (mapping->nrpages != 0) { 967 if (mapping->nrpages != 0) {
968 int ret = invalidate_inode_pages2(mapping); 968 if (S_ISREG(inode->i_mode)) {
969 ret = nfs_sync_mapping(mapping);
970 if (ret < 0)
971 return ret;
972 }
973 ret = invalidate_inode_pages2(mapping);
969 if (ret < 0) 974 if (ret < 0)
970 return ret; 975 return ret;
971 } 976 }
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index cf11799297c4..108a774095f7 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3071,15 +3071,13 @@ struct rpc_clnt *
3071nfs4_proc_lookup_mountpoint(struct inode *dir, struct qstr *name, 3071nfs4_proc_lookup_mountpoint(struct inode *dir, struct qstr *name,
3072 struct nfs_fh *fhandle, struct nfs_fattr *fattr) 3072 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
3073{ 3073{
3074 struct rpc_clnt *client = NFS_CLIENT(dir);
3074 int status; 3075 int status;
3075 struct rpc_clnt *client = rpc_clone_client(NFS_CLIENT(dir));
3076 3076
3077 status = nfs4_proc_lookup_common(&client, dir, name, fhandle, fattr, NULL); 3077 status = nfs4_proc_lookup_common(&client, dir, name, fhandle, fattr, NULL);
3078 if (status < 0) { 3078 if (status < 0)
3079 rpc_shutdown_client(client);
3080 return ERR_PTR(status); 3079 return ERR_PTR(status);
3081 } 3080 return (client == NFS_CLIENT(dir)) ? rpc_clone_client(client) : client;
3082 return client;
3083} 3081}
3084 3082
3085static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry) 3083static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry)
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 0abfb8466e79..3850b018815f 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -999,6 +999,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
999 __be32 *p; 999 __be32 *p;
1000 __be32 *q; 1000 __be32 *q;
1001 int len; 1001 int len;
1002 uint32_t bmval_len = 2;
1002 uint32_t bmval0 = 0; 1003 uint32_t bmval0 = 0;
1003 uint32_t bmval1 = 0; 1004 uint32_t bmval1 = 0;
1004 uint32_t bmval2 = 0; 1005 uint32_t bmval2 = 0;
@@ -1010,7 +1011,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
1010 * = 40 bytes, plus any contribution from variable-length fields 1011 * = 40 bytes, plus any contribution from variable-length fields
1011 * such as owner/group. 1012 * such as owner/group.
1012 */ 1013 */
1013 len = 20; 1014 len = 8;
1014 1015
1015 /* Sigh */ 1016 /* Sigh */
1016 if (iap->ia_valid & ATTR_SIZE) 1017 if (iap->ia_valid & ATTR_SIZE)
@@ -1040,8 +1041,6 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
1040 } 1041 }
1041 len += 4 + (XDR_QUADLEN(owner_grouplen) << 2); 1042 len += 4 + (XDR_QUADLEN(owner_grouplen) << 2);
1042 } 1043 }
1043 if (label)
1044 len += 4 + 4 + 4 + (XDR_QUADLEN(label->len) << 2);
1045 if (iap->ia_valid & ATTR_ATIME_SET) 1044 if (iap->ia_valid & ATTR_ATIME_SET)
1046 len += 16; 1045 len += 16;
1047 else if (iap->ia_valid & ATTR_ATIME) 1046 else if (iap->ia_valid & ATTR_ATIME)
@@ -1050,15 +1049,22 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
1050 len += 16; 1049 len += 16;
1051 else if (iap->ia_valid & ATTR_MTIME) 1050 else if (iap->ia_valid & ATTR_MTIME)
1052 len += 4; 1051 len += 4;
1052 if (label) {
1053 len += 4 + 4 + 4 + (XDR_QUADLEN(label->len) << 2);
1054 bmval_len = 3;
1055 }
1056
1057 len += bmval_len << 2;
1053 p = reserve_space(xdr, len); 1058 p = reserve_space(xdr, len);
1054 1059
1055 /* 1060 /*
1056 * We write the bitmap length now, but leave the bitmap and the attribute 1061 * We write the bitmap length now, but leave the bitmap and the attribute
1057 * buffer length to be backfilled at the end of this routine. 1062 * buffer length to be backfilled at the end of this routine.
1058 */ 1063 */
1059 *p++ = cpu_to_be32(3); 1064 *p++ = cpu_to_be32(bmval_len);
1060 q = p; 1065 q = p;
1061 p += 4; 1066 /* Skip bitmap entries + attrlen */
1067 p += bmval_len + 1;
1062 1068
1063 if (iap->ia_valid & ATTR_SIZE) { 1069 if (iap->ia_valid & ATTR_SIZE) {
1064 bmval0 |= FATTR4_WORD0_SIZE; 1070 bmval0 |= FATTR4_WORD0_SIZE;
@@ -1112,10 +1118,11 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
1112 len, ((char *)p - (char *)q) + 4); 1118 len, ((char *)p - (char *)q) + 4);
1113 BUG(); 1119 BUG();
1114 } 1120 }
1115 len = (char *)p - (char *)q - 16;
1116 *q++ = htonl(bmval0); 1121 *q++ = htonl(bmval0);
1117 *q++ = htonl(bmval1); 1122 *q++ = htonl(bmval1);
1118 *q++ = htonl(bmval2); 1123 if (bmval_len == 3)
1124 *q++ = htonl(bmval2);
1125 len = (char *)p - (char *)(q + 1);
1119 *q = htonl(len); 1126 *q = htonl(len);
1120 1127
1121/* out: */ 1128/* out: */
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 71fdc0dfa0d2..f6db66d8f647 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2478,6 +2478,10 @@ struct dentry *nfs_fs_mount_common(struct nfs_server *server,
2478 if (server->flags & NFS_MOUNT_NOAC) 2478 if (server->flags & NFS_MOUNT_NOAC)
2479 sb_mntdata.mntflags |= MS_SYNCHRONOUS; 2479 sb_mntdata.mntflags |= MS_SYNCHRONOUS;
2480 2480
2481 if (mount_info->cloned != NULL && mount_info->cloned->sb != NULL)
2482 if (mount_info->cloned->sb->s_flags & MS_SYNCHRONOUS)
2483 sb_mntdata.mntflags |= MS_SYNCHRONOUS;
2484
2481 /* Get a superblock - note that we may end up sharing one that already exists */ 2485 /* Get a superblock - note that we may end up sharing one that already exists */
2482 s = sget(nfs_mod->nfs_fs, compare_super, nfs_set_super, flags, &sb_mntdata); 2486 s = sget(nfs_mod->nfs_fs, compare_super, nfs_set_super, flags, &sb_mntdata);
2483 if (IS_ERR(s)) { 2487 if (IS_ERR(s)) {
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index a7cee864e7b2..419572f33b72 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1293,7 +1293,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
1293 * According to RFC3010, this takes precedence over all other errors. 1293 * According to RFC3010, this takes precedence over all other errors.
1294 */ 1294 */
1295 status = nfserr_minor_vers_mismatch; 1295 status = nfserr_minor_vers_mismatch;
1296 if (args->minorversion > nfsd_supported_minorversion) 1296 if (nfsd_minorversion(args->minorversion, NFSD_TEST) <= 0)
1297 goto out; 1297 goto out;
1298 1298
1299 status = nfs41_check_op_ordering(args); 1299 status = nfs41_check_op_ordering(args);
@@ -1524,7 +1524,7 @@ static inline u32 nfsd4_write_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
1524static inline u32 nfsd4_exchange_id_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) 1524static inline u32 nfsd4_exchange_id_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
1525{ 1525{
1526 return (op_encode_hdr_size + 2 + 1 + /* eir_clientid, eir_sequenceid */\ 1526 return (op_encode_hdr_size + 2 + 1 + /* eir_clientid, eir_sequenceid */\
1527 1 + 1 + 0 + /* eir_flags, spr_how, SP4_NONE (for now) */\ 1527 1 + 1 + 2 + /* eir_flags, spr_how, spo_must_enforce & _allow */\
1528 2 + /*eir_server_owner.so_minor_id */\ 1528 2 + /*eir_server_owner.so_minor_id */\
1529 /* eir_server_owner.so_major_id<> */\ 1529 /* eir_server_owner.so_major_id<> */\
1530 XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 +\ 1530 XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 +\
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 280acef6f0dc..43f42290e5df 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1264,6 +1264,8 @@ static bool svc_rqst_integrity_protected(struct svc_rqst *rqstp)
1264 struct svc_cred *cr = &rqstp->rq_cred; 1264 struct svc_cred *cr = &rqstp->rq_cred;
1265 u32 service; 1265 u32 service;
1266 1266
1267 if (!cr->cr_gss_mech)
1268 return false;
1267 service = gss_pseudoflavor_to_service(cr->cr_gss_mech, cr->cr_flavor); 1269 service = gss_pseudoflavor_to_service(cr->cr_gss_mech, cr->cr_flavor);
1268 return service == RPC_GSS_SVC_INTEGRITY || 1270 return service == RPC_GSS_SVC_INTEGRITY ||
1269 service == RPC_GSS_SVC_PRIVACY; 1271 service == RPC_GSS_SVC_PRIVACY;
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 0c0f3ea90de5..c2a4701d7286 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3360,7 +3360,8 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr,
3360 8 /* eir_clientid */ + 3360 8 /* eir_clientid */ +
3361 4 /* eir_sequenceid */ + 3361 4 /* eir_sequenceid */ +
3362 4 /* eir_flags */ + 3362 4 /* eir_flags */ +
3363 4 /* spr_how (SP4_NONE) */ + 3363 4 /* spr_how */ +
3364 8 /* spo_must_enforce, spo_must_allow */ +
3364 8 /* so_minor_id */ + 3365 8 /* so_minor_id */ +
3365 4 /* so_major_id.len */ + 3366 4 /* so_major_id.len */ +
3366 (XDR_QUADLEN(major_id_sz) * 4) + 3367 (XDR_QUADLEN(major_id_sz) * 4) +
@@ -3372,8 +3373,6 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr,
3372 WRITE32(exid->seqid); 3373 WRITE32(exid->seqid);
3373 WRITE32(exid->flags); 3374 WRITE32(exid->flags);
3374 3375
3375 /* state_protect4_r. Currently only support SP4_NONE */
3376 BUG_ON(exid->spa_how != SP4_NONE);
3377 WRITE32(exid->spa_how); 3376 WRITE32(exid->spa_how);
3378 switch (exid->spa_how) { 3377 switch (exid->spa_how) {
3379 case SP4_NONE: 3378 case SP4_NONE:
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 2bbd94e51efc..30f34ab02137 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -53,7 +53,6 @@ struct readdir_cd {
53extern struct svc_program nfsd_program; 53extern struct svc_program nfsd_program;
54extern struct svc_version nfsd_version2, nfsd_version3, 54extern struct svc_version nfsd_version2, nfsd_version3,
55 nfsd_version4; 55 nfsd_version4;
56extern u32 nfsd_supported_minorversion;
57extern struct mutex nfsd_mutex; 56extern struct mutex nfsd_mutex;
58extern spinlock_t nfsd_drc_lock; 57extern spinlock_t nfsd_drc_lock;
59extern unsigned long nfsd_drc_max_mem; 58extern unsigned long nfsd_drc_max_mem;
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 6b9f48ca4c25..760c85a6f534 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -116,7 +116,10 @@ struct svc_program nfsd_program = {
116 116
117}; 117};
118 118
119u32 nfsd_supported_minorversion = 1; 119static bool nfsd_supported_minorversions[NFSD_SUPPORTED_MINOR_VERSION + 1] = {
120 [0] = 1,
121 [1] = 1,
122};
120 123
121int nfsd_vers(int vers, enum vers_op change) 124int nfsd_vers(int vers, enum vers_op change)
122{ 125{
@@ -151,15 +154,13 @@ int nfsd_minorversion(u32 minorversion, enum vers_op change)
151 return -1; 154 return -1;
152 switch(change) { 155 switch(change) {
153 case NFSD_SET: 156 case NFSD_SET:
154 nfsd_supported_minorversion = minorversion; 157 nfsd_supported_minorversions[minorversion] = true;
155 break; 158 break;
156 case NFSD_CLEAR: 159 case NFSD_CLEAR:
157 if (minorversion == 0) 160 nfsd_supported_minorversions[minorversion] = false;
158 return -1;
159 nfsd_supported_minorversion = minorversion - 1;
160 break; 161 break;
161 case NFSD_TEST: 162 case NFSD_TEST:
162 return minorversion <= nfsd_supported_minorversion; 163 return nfsd_supported_minorversions[minorversion];
163 case NFSD_AVAIL: 164 case NFSD_AVAIL:
164 return minorversion <= NFSD_SUPPORTED_MINOR_VERSION; 165 return minorversion <= NFSD_SUPPORTED_MINOR_VERSION;
165 } 166 }
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 8ff6a0019b0b..c827acb0e943 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -830,9 +830,10 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
830 flags = O_WRONLY|O_LARGEFILE; 830 flags = O_WRONLY|O_LARGEFILE;
831 } 831 }
832 *filp = dentry_open(&path, flags, current_cred()); 832 *filp = dentry_open(&path, flags, current_cred());
833 if (IS_ERR(*filp)) 833 if (IS_ERR(*filp)) {
834 host_err = PTR_ERR(*filp); 834 host_err = PTR_ERR(*filp);
835 else { 835 *filp = NULL;
836 } else {
836 host_err = ima_file_check(*filp, may_flags); 837 host_err = ima_file_check(*filp, may_flags);
837 838
838 if (may_flags & NFSD_MAY_64BIT_COOKIE) 839 if (may_flags & NFSD_MAY_64BIT_COOKIE)
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index dc9a913784ab..2d8be51f90dc 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -345,8 +345,7 @@ static void nilfs_end_bio_write(struct bio *bio, int err)
345 345
346 if (err == -EOPNOTSUPP) { 346 if (err == -EOPNOTSUPP) {
347 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); 347 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
348 bio_put(bio); 348 /* to be detected by nilfs_segbuf_submit_bio() */
349 /* to be detected by submit_seg_bio() */
350 } 349 }
351 350
352 if (!uptodate) 351 if (!uptodate)
@@ -377,12 +376,12 @@ static int nilfs_segbuf_submit_bio(struct nilfs_segment_buffer *segbuf,
377 bio->bi_private = segbuf; 376 bio->bi_private = segbuf;
378 bio_get(bio); 377 bio_get(bio);
379 submit_bio(mode, bio); 378 submit_bio(mode, bio);
379 segbuf->sb_nbio++;
380 if (bio_flagged(bio, BIO_EOPNOTSUPP)) { 380 if (bio_flagged(bio, BIO_EOPNOTSUPP)) {
381 bio_put(bio); 381 bio_put(bio);
382 err = -EOPNOTSUPP; 382 err = -EOPNOTSUPP;
383 goto failed; 383 goto failed;
384 } 384 }
385 segbuf->sb_nbio++;
386 bio_put(bio); 385 bio_put(bio);
387 386
388 wi->bio = NULL; 387 wi->bio = NULL;
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 79736a28d84f..2abf97b2a592 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1757,7 +1757,7 @@ try_again:
1757 goto out; 1757 goto out;
1758 } else if (ret == 1) { 1758 } else if (ret == 1) {
1759 clusters_need = wc->w_clen; 1759 clusters_need = wc->w_clen;
1760 ret = ocfs2_refcount_cow(inode, filp, di_bh, 1760 ret = ocfs2_refcount_cow(inode, di_bh,
1761 wc->w_cpos, wc->w_clen, UINT_MAX); 1761 wc->w_cpos, wc->w_clen, UINT_MAX);
1762 if (ret) { 1762 if (ret) {
1763 mlog_errno(ret); 1763 mlog_errno(ret);
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index eb760d8acd50..30544ce8e9f7 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -2153,11 +2153,9 @@ int ocfs2_empty_dir(struct inode *inode)
2153{ 2153{
2154 int ret; 2154 int ret;
2155 struct ocfs2_empty_dir_priv priv = { 2155 struct ocfs2_empty_dir_priv priv = {
2156 .ctx.actor = ocfs2_empty_dir_filldir 2156 .ctx.actor = ocfs2_empty_dir_filldir,
2157 }; 2157 };
2158 2158
2159 memset(&priv, 0, sizeof(priv));
2160
2161 if (ocfs2_dir_indexed(inode)) { 2159 if (ocfs2_dir_indexed(inode)) {
2162 ret = ocfs2_empty_dir_dx(inode, &priv); 2160 ret = ocfs2_empty_dir_dx(inode, &priv);
2163 if (ret) 2161 if (ret)
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 41000f223ca4..3261d71319ee 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -370,7 +370,7 @@ static int ocfs2_cow_file_pos(struct inode *inode,
370 if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) 370 if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
371 goto out; 371 goto out;
372 372
373 return ocfs2_refcount_cow(inode, NULL, fe_bh, cpos, 1, cpos+1); 373 return ocfs2_refcount_cow(inode, fe_bh, cpos, 1, cpos+1);
374 374
375out: 375out:
376 return status; 376 return status;
@@ -899,7 +899,7 @@ static int ocfs2_zero_extend_get_range(struct inode *inode,
899 zero_clusters = last_cpos - zero_cpos; 899 zero_clusters = last_cpos - zero_cpos;
900 900
901 if (needs_cow) { 901 if (needs_cow) {
902 rc = ocfs2_refcount_cow(inode, NULL, di_bh, zero_cpos, 902 rc = ocfs2_refcount_cow(inode, di_bh, zero_cpos,
903 zero_clusters, UINT_MAX); 903 zero_clusters, UINT_MAX);
904 if (rc) { 904 if (rc) {
905 mlog_errno(rc); 905 mlog_errno(rc);
@@ -2078,7 +2078,7 @@ static int ocfs2_prepare_inode_for_refcount(struct inode *inode,
2078 2078
2079 *meta_level = 1; 2079 *meta_level = 1;
2080 2080
2081 ret = ocfs2_refcount_cow(inode, file, di_bh, cpos, clusters, UINT_MAX); 2081 ret = ocfs2_refcount_cow(inode, di_bh, cpos, clusters, UINT_MAX);
2082 if (ret) 2082 if (ret)
2083 mlog_errno(ret); 2083 mlog_errno(ret);
2084out: 2084out:
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 96f9ac237e86..0a992737dcaf 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -537,7 +537,7 @@ static inline int ocfs2_calc_extend_credits(struct super_block *sb,
537 extent_blocks = 1 + 1 + le16_to_cpu(root_el->l_tree_depth); 537 extent_blocks = 1 + 1 + le16_to_cpu(root_el->l_tree_depth);
538 538
539 return bitmap_blocks + sysfile_bitmap_blocks + extent_blocks + 539 return bitmap_blocks + sysfile_bitmap_blocks + extent_blocks +
540 ocfs2_quota_trans_credits(sb) + bits_wanted; 540 ocfs2_quota_trans_credits(sb);
541} 541}
542 542
543static inline int ocfs2_calc_symlink_credits(struct super_block *sb) 543static inline int ocfs2_calc_symlink_credits(struct super_block *sb)
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index f1fc172175b6..452068b45749 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -69,7 +69,7 @@ static int __ocfs2_move_extent(handle_t *handle,
69 u64 ino = ocfs2_metadata_cache_owner(context->et.et_ci); 69 u64 ino = ocfs2_metadata_cache_owner(context->et.et_ci);
70 u64 old_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cpos); 70 u64 old_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cpos);
71 71
72 ret = ocfs2_duplicate_clusters_by_page(handle, context->file, cpos, 72 ret = ocfs2_duplicate_clusters_by_page(handle, inode, cpos,
73 p_cpos, new_p_cpos, len); 73 p_cpos, new_p_cpos, len);
74 if (ret) { 74 if (ret) {
75 mlog_errno(ret); 75 mlog_errno(ret);
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 998b17eda09d..a70d604593b6 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -49,7 +49,6 @@
49 49
50struct ocfs2_cow_context { 50struct ocfs2_cow_context {
51 struct inode *inode; 51 struct inode *inode;
52 struct file *file;
53 u32 cow_start; 52 u32 cow_start;
54 u32 cow_len; 53 u32 cow_len;
55 struct ocfs2_extent_tree data_et; 54 struct ocfs2_extent_tree data_et;
@@ -66,7 +65,7 @@ struct ocfs2_cow_context {
66 u32 *num_clusters, 65 u32 *num_clusters,
67 unsigned int *extent_flags); 66 unsigned int *extent_flags);
68 int (*cow_duplicate_clusters)(handle_t *handle, 67 int (*cow_duplicate_clusters)(handle_t *handle,
69 struct file *file, 68 struct inode *inode,
70 u32 cpos, u32 old_cluster, 69 u32 cpos, u32 old_cluster,
71 u32 new_cluster, u32 new_len); 70 u32 new_cluster, u32 new_len);
72}; 71};
@@ -2922,14 +2921,12 @@ static int ocfs2_clear_cow_buffer(handle_t *handle, struct buffer_head *bh)
2922} 2921}
2923 2922
2924int ocfs2_duplicate_clusters_by_page(handle_t *handle, 2923int ocfs2_duplicate_clusters_by_page(handle_t *handle,
2925 struct file *file, 2924 struct inode *inode,
2926 u32 cpos, u32 old_cluster, 2925 u32 cpos, u32 old_cluster,
2927 u32 new_cluster, u32 new_len) 2926 u32 new_cluster, u32 new_len)
2928{ 2927{
2929 int ret = 0, partial; 2928 int ret = 0, partial;
2930 struct inode *inode = file_inode(file); 2929 struct super_block *sb = inode->i_sb;
2931 struct ocfs2_caching_info *ci = INODE_CACHE(inode);
2932 struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
2933 u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); 2930 u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster);
2934 struct page *page; 2931 struct page *page;
2935 pgoff_t page_index; 2932 pgoff_t page_index;
@@ -2965,6 +2962,11 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle,
2965 to = map_end & (PAGE_CACHE_SIZE - 1); 2962 to = map_end & (PAGE_CACHE_SIZE - 1);
2966 2963
2967 page = find_or_create_page(mapping, page_index, GFP_NOFS); 2964 page = find_or_create_page(mapping, page_index, GFP_NOFS);
2965 if (!page) {
2966 ret = -ENOMEM;
2967 mlog_errno(ret);
2968 break;
2969 }
2968 2970
2969 /* 2971 /*
2970 * In case PAGE_CACHE_SIZE <= CLUSTER_SIZE, This page 2972 * In case PAGE_CACHE_SIZE <= CLUSTER_SIZE, This page
@@ -2973,13 +2975,6 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle,
2973 if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize) 2975 if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize)
2974 BUG_ON(PageDirty(page)); 2976 BUG_ON(PageDirty(page));
2975 2977
2976 if (PageReadahead(page)) {
2977 page_cache_async_readahead(mapping,
2978 &file->f_ra, file,
2979 page, page_index,
2980 readahead_pages);
2981 }
2982
2983 if (!PageUptodate(page)) { 2978 if (!PageUptodate(page)) {
2984 ret = block_read_full_page(page, ocfs2_get_block); 2979 ret = block_read_full_page(page, ocfs2_get_block);
2985 if (ret) { 2980 if (ret) {
@@ -2999,7 +2994,8 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle,
2999 } 2994 }
3000 } 2995 }
3001 2996
3002 ocfs2_map_and_dirty_page(inode, handle, from, to, 2997 ocfs2_map_and_dirty_page(inode,
2998 handle, from, to,
3003 page, 0, &new_block); 2999 page, 0, &new_block);
3004 mark_page_accessed(page); 3000 mark_page_accessed(page);
3005unlock: 3001unlock:
@@ -3015,12 +3011,11 @@ unlock:
3015} 3011}
3016 3012
3017int ocfs2_duplicate_clusters_by_jbd(handle_t *handle, 3013int ocfs2_duplicate_clusters_by_jbd(handle_t *handle,
3018 struct file *file, 3014 struct inode *inode,
3019 u32 cpos, u32 old_cluster, 3015 u32 cpos, u32 old_cluster,
3020 u32 new_cluster, u32 new_len) 3016 u32 new_cluster, u32 new_len)
3021{ 3017{
3022 int ret = 0; 3018 int ret = 0;
3023 struct inode *inode = file_inode(file);
3024 struct super_block *sb = inode->i_sb; 3019 struct super_block *sb = inode->i_sb;
3025 struct ocfs2_caching_info *ci = INODE_CACHE(inode); 3020 struct ocfs2_caching_info *ci = INODE_CACHE(inode);
3026 int i, blocks = ocfs2_clusters_to_blocks(sb, new_len); 3021 int i, blocks = ocfs2_clusters_to_blocks(sb, new_len);
@@ -3145,7 +3140,7 @@ static int ocfs2_replace_clusters(handle_t *handle,
3145 3140
3146 /*If the old clusters is unwritten, no need to duplicate. */ 3141 /*If the old clusters is unwritten, no need to duplicate. */
3147 if (!(ext_flags & OCFS2_EXT_UNWRITTEN)) { 3142 if (!(ext_flags & OCFS2_EXT_UNWRITTEN)) {
3148 ret = context->cow_duplicate_clusters(handle, context->file, 3143 ret = context->cow_duplicate_clusters(handle, context->inode,
3149 cpos, old, new, len); 3144 cpos, old, new, len);
3150 if (ret) { 3145 if (ret) {
3151 mlog_errno(ret); 3146 mlog_errno(ret);
@@ -3423,35 +3418,12 @@ static int ocfs2_replace_cow(struct ocfs2_cow_context *context)
3423 return ret; 3418 return ret;
3424} 3419}
3425 3420
3426static void ocfs2_readahead_for_cow(struct inode *inode,
3427 struct file *file,
3428 u32 start, u32 len)
3429{
3430 struct address_space *mapping;
3431 pgoff_t index;
3432 unsigned long num_pages;
3433 int cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits;
3434
3435 if (!file)
3436 return;
3437
3438 mapping = file->f_mapping;
3439 num_pages = (len << cs_bits) >> PAGE_CACHE_SHIFT;
3440 if (!num_pages)
3441 num_pages = 1;
3442
3443 index = ((loff_t)start << cs_bits) >> PAGE_CACHE_SHIFT;
3444 page_cache_sync_readahead(mapping, &file->f_ra, file,
3445 index, num_pages);
3446}
3447
3448/* 3421/*
3449 * Starting at cpos, try to CoW write_len clusters. Don't CoW 3422 * Starting at cpos, try to CoW write_len clusters. Don't CoW
3450 * past max_cpos. This will stop when it runs into a hole or an 3423 * past max_cpos. This will stop when it runs into a hole or an
3451 * unrefcounted extent. 3424 * unrefcounted extent.
3452 */ 3425 */
3453static int ocfs2_refcount_cow_hunk(struct inode *inode, 3426static int ocfs2_refcount_cow_hunk(struct inode *inode,
3454 struct file *file,
3455 struct buffer_head *di_bh, 3427 struct buffer_head *di_bh,
3456 u32 cpos, u32 write_len, u32 max_cpos) 3428 u32 cpos, u32 write_len, u32 max_cpos)
3457{ 3429{
@@ -3480,8 +3452,6 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode,
3480 3452
3481 BUG_ON(cow_len == 0); 3453 BUG_ON(cow_len == 0);
3482 3454
3483 ocfs2_readahead_for_cow(inode, file, cow_start, cow_len);
3484
3485 context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS); 3455 context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS);
3486 if (!context) { 3456 if (!context) {
3487 ret = -ENOMEM; 3457 ret = -ENOMEM;
@@ -3503,7 +3473,6 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode,
3503 context->ref_root_bh = ref_root_bh; 3473 context->ref_root_bh = ref_root_bh;
3504 context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_page; 3474 context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_page;
3505 context->get_clusters = ocfs2_di_get_clusters; 3475 context->get_clusters = ocfs2_di_get_clusters;
3506 context->file = file;
3507 3476
3508 ocfs2_init_dinode_extent_tree(&context->data_et, 3477 ocfs2_init_dinode_extent_tree(&context->data_et,
3509 INODE_CACHE(inode), di_bh); 3478 INODE_CACHE(inode), di_bh);
@@ -3532,7 +3501,6 @@ out:
3532 * clusters between cpos and cpos+write_len are safe to modify. 3501 * clusters between cpos and cpos+write_len are safe to modify.
3533 */ 3502 */
3534int ocfs2_refcount_cow(struct inode *inode, 3503int ocfs2_refcount_cow(struct inode *inode,
3535 struct file *file,
3536 struct buffer_head *di_bh, 3504 struct buffer_head *di_bh,
3537 u32 cpos, u32 write_len, u32 max_cpos) 3505 u32 cpos, u32 write_len, u32 max_cpos)
3538{ 3506{
@@ -3552,7 +3520,7 @@ int ocfs2_refcount_cow(struct inode *inode,
3552 num_clusters = write_len; 3520 num_clusters = write_len;
3553 3521
3554 if (ext_flags & OCFS2_EXT_REFCOUNTED) { 3522 if (ext_flags & OCFS2_EXT_REFCOUNTED) {
3555 ret = ocfs2_refcount_cow_hunk(inode, file, di_bh, cpos, 3523 ret = ocfs2_refcount_cow_hunk(inode, di_bh, cpos,
3556 num_clusters, max_cpos); 3524 num_clusters, max_cpos);
3557 if (ret) { 3525 if (ret) {
3558 mlog_errno(ret); 3526 mlog_errno(ret);
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index 7754608c83a4..6422bbcdb525 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -53,7 +53,7 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
53 int *credits, 53 int *credits,
54 int *ref_blocks); 54 int *ref_blocks);
55int ocfs2_refcount_cow(struct inode *inode, 55int ocfs2_refcount_cow(struct inode *inode,
56 struct file *filep, struct buffer_head *di_bh, 56 struct buffer_head *di_bh,
57 u32 cpos, u32 write_len, u32 max_cpos); 57 u32 cpos, u32 write_len, u32 max_cpos);
58 58
59typedef int (ocfs2_post_refcount_func)(struct inode *inode, 59typedef int (ocfs2_post_refcount_func)(struct inode *inode,
@@ -85,11 +85,11 @@ int ocfs2_refcount_cow_xattr(struct inode *inode,
85 u32 cpos, u32 write_len, 85 u32 cpos, u32 write_len,
86 struct ocfs2_post_refcount *post); 86 struct ocfs2_post_refcount *post);
87int ocfs2_duplicate_clusters_by_page(handle_t *handle, 87int ocfs2_duplicate_clusters_by_page(handle_t *handle,
88 struct file *file, 88 struct inode *inode,
89 u32 cpos, u32 old_cluster, 89 u32 cpos, u32 old_cluster,
90 u32 new_cluster, u32 new_len); 90 u32 new_cluster, u32 new_len);
91int ocfs2_duplicate_clusters_by_jbd(handle_t *handle, 91int ocfs2_duplicate_clusters_by_jbd(handle_t *handle,
92 struct file *file, 92 struct inode *inode,
93 u32 cpos, u32 old_cluster, 93 u32 cpos, u32 old_cluster,
94 u32 new_cluster, u32 new_len); 94 u32 new_cluster, u32 new_len);
95int ocfs2_cow_sync_writeback(struct super_block *sb, 95int ocfs2_cow_sync_writeback(struct super_block *sb,
diff --git a/fs/open.c b/fs/open.c
index 9156cb050d08..7931f76acc2b 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -823,7 +823,7 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o
823 int lookup_flags = 0; 823 int lookup_flags = 0;
824 int acc_mode; 824 int acc_mode;
825 825
826 if (flags & O_CREAT) 826 if (flags & (O_CREAT | __O_TMPFILE))
827 op->mode = (mode & S_IALLUGO) | S_IFREG; 827 op->mode = (mode & S_IALLUGO) | S_IFREG;
828 else 828 else
829 op->mode = 0; 829 op->mode = 0;
@@ -844,6 +844,8 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o
844 if ((flags & O_TMPFILE_MASK) != O_TMPFILE) 844 if ((flags & O_TMPFILE_MASK) != O_TMPFILE)
845 return -EINVAL; 845 return -EINVAL;
846 acc_mode = MAY_OPEN | ACC_MODE(flags); 846 acc_mode = MAY_OPEN | ACC_MODE(flags);
847 if (!(acc_mode & MAY_WRITE))
848 return -EINVAL;
847 } else if (flags & O_PATH) { 849 } else if (flags & O_PATH) {
848 /* 850 /*
849 * If we have O_PATH in the open flag. Then we 851 * If we have O_PATH in the open flag. Then we
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 75f2890abbd8..0ff80f9b930f 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -230,8 +230,6 @@ static int proc_readfd_common(struct file *file, struct dir_context *ctx,
230 230
231 if (!dir_emit_dots(file, ctx)) 231 if (!dir_emit_dots(file, ctx))
232 goto out; 232 goto out;
233 if (!dir_emit_dots(file, ctx))
234 goto out;
235 files = get_files_struct(p); 233 files = get_files_struct(p);
236 if (!files) 234 if (!files)
237 goto out; 235 goto out;
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 94441a407337..737e15615b04 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -271,7 +271,7 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *file,
271 de = next; 271 de = next;
272 } while (de); 272 } while (de);
273 spin_unlock(&proc_subdir_lock); 273 spin_unlock(&proc_subdir_lock);
274 return 0; 274 return 1;
275} 275}
276 276
277int proc_readdir(struct file *file, struct dir_context *ctx) 277int proc_readdir(struct file *file, struct dir_context *ctx)
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 229e366598da..e0a790da726d 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -205,7 +205,9 @@ static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentr
205static int proc_root_readdir(struct file *file, struct dir_context *ctx) 205static int proc_root_readdir(struct file *file, struct dir_context *ctx)
206{ 206{
207 if (ctx->pos < FIRST_PROCESS_ENTRY) { 207 if (ctx->pos < FIRST_PROCESS_ENTRY) {
208 proc_readdir(file, ctx); 208 int error = proc_readdir(file, ctx);
209 if (unlikely(error <= 0))
210 return error;
209 ctx->pos = FIRST_PROCESS_ENTRY; 211 ctx->pos = FIRST_PROCESS_ENTRY;
210 } 212 }
211 213
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index dbf61f6174f0..107d026f5d6e 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -730,8 +730,16 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
730 * of how soft-dirty works. 730 * of how soft-dirty works.
731 */ 731 */
732 pte_t ptent = *pte; 732 pte_t ptent = *pte;
733 ptent = pte_wrprotect(ptent); 733
734 ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY); 734 if (pte_present(ptent)) {
735 ptent = pte_wrprotect(ptent);
736 ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY);
737 } else if (is_swap_pte(ptent)) {
738 ptent = pte_swp_clear_soft_dirty(ptent);
739 } else if (pte_file(ptent)) {
740 ptent = pte_file_clear_soft_dirty(ptent);
741 }
742
735 set_pte_at(vma->vm_mm, addr, pte, ptent); 743 set_pte_at(vma->vm_mm, addr, pte, ptent);
736#endif 744#endif
737} 745}
@@ -752,14 +760,15 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
752 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 760 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
753 for (; addr != end; pte++, addr += PAGE_SIZE) { 761 for (; addr != end; pte++, addr += PAGE_SIZE) {
754 ptent = *pte; 762 ptent = *pte;
755 if (!pte_present(ptent))
756 continue;
757 763
758 if (cp->type == CLEAR_REFS_SOFT_DIRTY) { 764 if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
759 clear_soft_dirty(vma, addr, pte); 765 clear_soft_dirty(vma, addr, pte);
760 continue; 766 continue;
761 } 767 }
762 768
769 if (!pte_present(ptent))
770 continue;
771
763 page = vm_normal_page(vma, addr, ptent); 772 page = vm_normal_page(vma, addr, ptent);
764 if (!page) 773 if (!page)
765 continue; 774 continue;
@@ -859,7 +868,7 @@ typedef struct {
859} pagemap_entry_t; 868} pagemap_entry_t;
860 869
861struct pagemapread { 870struct pagemapread {
862 int pos, len; 871 int pos, len; /* units: PM_ENTRY_BYTES, not bytes */
863 pagemap_entry_t *buffer; 872 pagemap_entry_t *buffer;
864 bool v2; 873 bool v2;
865}; 874};
@@ -867,7 +876,7 @@ struct pagemapread {
867#define PAGEMAP_WALK_SIZE (PMD_SIZE) 876#define PAGEMAP_WALK_SIZE (PMD_SIZE)
868#define PAGEMAP_WALK_MASK (PMD_MASK) 877#define PAGEMAP_WALK_MASK (PMD_MASK)
869 878
870#define PM_ENTRY_BYTES sizeof(u64) 879#define PM_ENTRY_BYTES sizeof(pagemap_entry_t)
871#define PM_STATUS_BITS 3 880#define PM_STATUS_BITS 3
872#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS) 881#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS)
873#define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET) 882#define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET)
@@ -930,8 +939,10 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
930 flags = PM_PRESENT; 939 flags = PM_PRESENT;
931 page = vm_normal_page(vma, addr, pte); 940 page = vm_normal_page(vma, addr, pte);
932 } else if (is_swap_pte(pte)) { 941 } else if (is_swap_pte(pte)) {
933 swp_entry_t entry = pte_to_swp_entry(pte); 942 swp_entry_t entry;
934 943 if (pte_swp_soft_dirty(pte))
944 flags2 |= __PM_SOFT_DIRTY;
945 entry = pte_to_swp_entry(pte);
935 frame = swp_type(entry) | 946 frame = swp_type(entry) |
936 (swp_offset(entry) << MAX_SWAPFILES_SHIFT); 947 (swp_offset(entry) << MAX_SWAPFILES_SHIFT);
937 flags = PM_SWAP; 948 flags = PM_SWAP;
@@ -1116,8 +1127,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
1116 goto out_task; 1127 goto out_task;
1117 1128
1118 pm.v2 = soft_dirty_cleared; 1129 pm.v2 = soft_dirty_cleared;
1119 pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); 1130 pm.len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
1120 pm.buffer = kmalloc(pm.len, GFP_TEMPORARY); 1131 pm.buffer = kmalloc(pm.len * PM_ENTRY_BYTES, GFP_TEMPORARY);
1121 ret = -ENOMEM; 1132 ret = -ENOMEM;
1122 if (!pm.buffer) 1133 if (!pm.buffer)
1123 goto out_task; 1134 goto out_task;
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 28503172f2e4..a1a16eb97c7b 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -223,7 +223,7 @@ static inline char *alloc_elfnotes_buf(size_t notes_sz)
223 * regions in the 1st kernel pointed to by PT_LOAD entries) into 223 * regions in the 1st kernel pointed to by PT_LOAD entries) into
224 * virtually contiguous user-space in ELF layout. 224 * virtually contiguous user-space in ELF layout.
225 */ 225 */
226#ifdef CONFIG_MMU 226#if defined(CONFIG_MMU) && !defined(CONFIG_S390)
227static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) 227static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
228{ 228{
229 size_t size = vma->vm_end - vma->vm_start; 229 size_t size = vma->vm_end - vma->vm_start;
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index 33532f79b4f7..a958444a75fc 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -19,12 +19,13 @@
19/* 19/*
20 * LOCKING: 20 * LOCKING:
21 * 21 *
22 * We rely on new Alexander Viro's super-block locking. 22 * These guys are evicted from procfs as the very first step in ->kill_sb().
23 * 23 *
24 */ 24 */
25 25
26static int show_version(struct seq_file *m, struct super_block *sb) 26static int show_version(struct seq_file *m, void *unused)
27{ 27{
28 struct super_block *sb = m->private;
28 char *format; 29 char *format;
29 30
30 if (REISERFS_SB(sb)->s_properties & (1 << REISERFS_3_6)) { 31 if (REISERFS_SB(sb)->s_properties & (1 << REISERFS_3_6)) {
@@ -66,8 +67,9 @@ static int show_version(struct seq_file *m, struct super_block *sb)
66#define DJP( x ) le32_to_cpu( jp -> x ) 67#define DJP( x ) le32_to_cpu( jp -> x )
67#define JF( x ) ( r -> s_journal -> x ) 68#define JF( x ) ( r -> s_journal -> x )
68 69
69static int show_super(struct seq_file *m, struct super_block *sb) 70static int show_super(struct seq_file *m, void *unused)
70{ 71{
72 struct super_block *sb = m->private;
71 struct reiserfs_sb_info *r = REISERFS_SB(sb); 73 struct reiserfs_sb_info *r = REISERFS_SB(sb);
72 74
73 seq_printf(m, "state: \t%s\n" 75 seq_printf(m, "state: \t%s\n"
@@ -128,8 +130,9 @@ static int show_super(struct seq_file *m, struct super_block *sb)
128 return 0; 130 return 0;
129} 131}
130 132
131static int show_per_level(struct seq_file *m, struct super_block *sb) 133static int show_per_level(struct seq_file *m, void *unused)
132{ 134{
135 struct super_block *sb = m->private;
133 struct reiserfs_sb_info *r = REISERFS_SB(sb); 136 struct reiserfs_sb_info *r = REISERFS_SB(sb);
134 int level; 137 int level;
135 138
@@ -186,8 +189,9 @@ static int show_per_level(struct seq_file *m, struct super_block *sb)
186 return 0; 189 return 0;
187} 190}
188 191
189static int show_bitmap(struct seq_file *m, struct super_block *sb) 192static int show_bitmap(struct seq_file *m, void *unused)
190{ 193{
194 struct super_block *sb = m->private;
191 struct reiserfs_sb_info *r = REISERFS_SB(sb); 195 struct reiserfs_sb_info *r = REISERFS_SB(sb);
192 196
193 seq_printf(m, "free_block: %lu\n" 197 seq_printf(m, "free_block: %lu\n"
@@ -218,8 +222,9 @@ static int show_bitmap(struct seq_file *m, struct super_block *sb)
218 return 0; 222 return 0;
219} 223}
220 224
221static int show_on_disk_super(struct seq_file *m, struct super_block *sb) 225static int show_on_disk_super(struct seq_file *m, void *unused)
222{ 226{
227 struct super_block *sb = m->private;
223 struct reiserfs_sb_info *sb_info = REISERFS_SB(sb); 228 struct reiserfs_sb_info *sb_info = REISERFS_SB(sb);
224 struct reiserfs_super_block *rs = sb_info->s_rs; 229 struct reiserfs_super_block *rs = sb_info->s_rs;
225 int hash_code = DFL(s_hash_function_code); 230 int hash_code = DFL(s_hash_function_code);
@@ -261,8 +266,9 @@ static int show_on_disk_super(struct seq_file *m, struct super_block *sb)
261 return 0; 266 return 0;
262} 267}
263 268
264static int show_oidmap(struct seq_file *m, struct super_block *sb) 269static int show_oidmap(struct seq_file *m, void *unused)
265{ 270{
271 struct super_block *sb = m->private;
266 struct reiserfs_sb_info *sb_info = REISERFS_SB(sb); 272 struct reiserfs_sb_info *sb_info = REISERFS_SB(sb);
267 struct reiserfs_super_block *rs = sb_info->s_rs; 273 struct reiserfs_super_block *rs = sb_info->s_rs;
268 unsigned int mapsize = le16_to_cpu(rs->s_v1.s_oid_cursize); 274 unsigned int mapsize = le16_to_cpu(rs->s_v1.s_oid_cursize);
@@ -291,8 +297,9 @@ static int show_oidmap(struct seq_file *m, struct super_block *sb)
291 return 0; 297 return 0;
292} 298}
293 299
294static int show_journal(struct seq_file *m, struct super_block *sb) 300static int show_journal(struct seq_file *m, void *unused)
295{ 301{
302 struct super_block *sb = m->private;
296 struct reiserfs_sb_info *r = REISERFS_SB(sb); 303 struct reiserfs_sb_info *r = REISERFS_SB(sb);
297 struct reiserfs_super_block *rs = r->s_rs; 304 struct reiserfs_super_block *rs = r->s_rs;
298 struct journal_params *jp = &rs->s_v1.s_journal; 305 struct journal_params *jp = &rs->s_v1.s_journal;
@@ -383,92 +390,24 @@ static int show_journal(struct seq_file *m, struct super_block *sb)
383 return 0; 390 return 0;
384} 391}
385 392
386/* iterator */
387static int test_sb(struct super_block *sb, void *data)
388{
389 return data == sb;
390}
391
392static int set_sb(struct super_block *sb, void *data)
393{
394 return -ENOENT;
395}
396
397struct reiserfs_seq_private {
398 struct super_block *sb;
399 int (*show) (struct seq_file *, struct super_block *);
400};
401
402static void *r_start(struct seq_file *m, loff_t * pos)
403{
404 struct reiserfs_seq_private *priv = m->private;
405 loff_t l = *pos;
406
407 if (l)
408 return NULL;
409
410 if (IS_ERR(sget(&reiserfs_fs_type, test_sb, set_sb, 0, priv->sb)))
411 return NULL;
412
413 up_write(&priv->sb->s_umount);
414 return priv->sb;
415}
416
417static void *r_next(struct seq_file *m, void *v, loff_t * pos)
418{
419 ++*pos;
420 if (v)
421 deactivate_super(v);
422 return NULL;
423}
424
425static void r_stop(struct seq_file *m, void *v)
426{
427 if (v)
428 deactivate_super(v);
429}
430
431static int r_show(struct seq_file *m, void *v)
432{
433 struct reiserfs_seq_private *priv = m->private;
434 return priv->show(m, v);
435}
436
437static const struct seq_operations r_ops = {
438 .start = r_start,
439 .next = r_next,
440 .stop = r_stop,
441 .show = r_show,
442};
443
444static int r_open(struct inode *inode, struct file *file) 393static int r_open(struct inode *inode, struct file *file)
445{ 394{
446 struct reiserfs_seq_private *priv; 395 return single_open(file, PDE_DATA(inode),
447 int ret = seq_open_private(file, &r_ops, 396 proc_get_parent_data(inode));
448 sizeof(struct reiserfs_seq_private));
449
450 if (!ret) {
451 struct seq_file *m = file->private_data;
452 priv = m->private;
453 priv->sb = proc_get_parent_data(inode);
454 priv->show = PDE_DATA(inode);
455 }
456 return ret;
457} 397}
458 398
459static const struct file_operations r_file_operations = { 399static const struct file_operations r_file_operations = {
460 .open = r_open, 400 .open = r_open,
461 .read = seq_read, 401 .read = seq_read,
462 .llseek = seq_lseek, 402 .llseek = seq_lseek,
463 .release = seq_release_private, 403 .release = single_release,
464 .owner = THIS_MODULE,
465}; 404};
466 405
467static struct proc_dir_entry *proc_info_root = NULL; 406static struct proc_dir_entry *proc_info_root = NULL;
468static const char proc_info_root_name[] = "fs/reiserfs"; 407static const char proc_info_root_name[] = "fs/reiserfs";
469 408
470static void add_file(struct super_block *sb, char *name, 409static void add_file(struct super_block *sb, char *name,
471 int (*func) (struct seq_file *, struct super_block *)) 410 int (*func) (struct seq_file *, void *))
472{ 411{
473 proc_create_data(name, 0, REISERFS_SB(sb)->procdir, 412 proc_create_data(name, 0, REISERFS_SB(sb)->procdir,
474 &r_file_operations, func); 413 &r_file_operations, func);
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index f8a23c3078f8..e2e202a07b31 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -499,6 +499,7 @@ int remove_save_link(struct inode *inode, int truncate)
499static void reiserfs_kill_sb(struct super_block *s) 499static void reiserfs_kill_sb(struct super_block *s)
500{ 500{
501 if (REISERFS_SB(s)) { 501 if (REISERFS_SB(s)) {
502 reiserfs_proc_info_done(s);
502 /* 503 /*
503 * Force any pending inode evictions to occur now. Any 504 * Force any pending inode evictions to occur now. Any
504 * inodes to be removed that have extended attributes 505 * inodes to be removed that have extended attributes
@@ -554,8 +555,6 @@ static void reiserfs_put_super(struct super_block *s)
554 REISERFS_SB(s)->reserved_blocks); 555 REISERFS_SB(s)->reserved_blocks);
555 } 556 }
556 557
557 reiserfs_proc_info_done(s);
558
559 reiserfs_write_unlock(s); 558 reiserfs_write_unlock(s);
560 mutex_destroy(&REISERFS_SB(s)->lock); 559 mutex_destroy(&REISERFS_SB(s)->lock);
561 kfree(s->s_fs_info); 560 kfree(s->s_fs_info);
diff --git a/fs/super.c b/fs/super.c
index 7465d4364208..68307c029228 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -336,19 +336,19 @@ EXPORT_SYMBOL(deactivate_super);
336 * and want to turn it into a full-blown active reference. grab_super() 336 * and want to turn it into a full-blown active reference. grab_super()
337 * is called with sb_lock held and drops it. Returns 1 in case of 337 * is called with sb_lock held and drops it. Returns 1 in case of
338 * success, 0 if we had failed (superblock contents was already dead or 338 * success, 0 if we had failed (superblock contents was already dead or
339 * dying when grab_super() had been called). 339 * dying when grab_super() had been called). Note that this is only
340 * called for superblocks not in rundown mode (== ones still on ->fs_supers
341 * of their type), so increment of ->s_count is OK here.
340 */ 342 */
341static int grab_super(struct super_block *s) __releases(sb_lock) 343static int grab_super(struct super_block *s) __releases(sb_lock)
342{ 344{
343 if (atomic_inc_not_zero(&s->s_active)) {
344 spin_unlock(&sb_lock);
345 return 1;
346 }
347 /* it's going away */
348 s->s_count++; 345 s->s_count++;
349 spin_unlock(&sb_lock); 346 spin_unlock(&sb_lock);
350 /* wait for it to die */
351 down_write(&s->s_umount); 347 down_write(&s->s_umount);
348 if ((s->s_flags & MS_BORN) && atomic_inc_not_zero(&s->s_active)) {
349 put_super(s);
350 return 1;
351 }
352 up_write(&s->s_umount); 352 up_write(&s->s_umount);
353 put_super(s); 353 put_super(s);
354 return 0; 354 return 0;
@@ -463,11 +463,6 @@ retry:
463 destroy_super(s); 463 destroy_super(s);
464 s = NULL; 464 s = NULL;
465 } 465 }
466 down_write(&old->s_umount);
467 if (unlikely(!(old->s_flags & MS_BORN))) {
468 deactivate_locked_super(old);
469 goto retry;
470 }
471 return old; 466 return old;
472 } 467 }
473 } 468 }
@@ -660,10 +655,10 @@ restart:
660 if (hlist_unhashed(&sb->s_instances)) 655 if (hlist_unhashed(&sb->s_instances))
661 continue; 656 continue;
662 if (sb->s_bdev == bdev) { 657 if (sb->s_bdev == bdev) {
663 if (grab_super(sb)) /* drops sb_lock */ 658 if (!grab_super(sb))
664 return sb;
665 else
666 goto restart; 659 goto restart;
660 up_write(&sb->s_umount);
661 return sb;
667 } 662 }
668 } 663 }
669 spin_unlock(&sb_lock); 664 spin_unlock(&sb_lock);
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index aec3d5c98c94..09a1a25cd145 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -20,38 +20,64 @@ static void remove_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
20 const struct attribute_group *grp) 20 const struct attribute_group *grp)
21{ 21{
22 struct attribute *const* attr; 22 struct attribute *const* attr;
23 int i; 23 struct bin_attribute *const* bin_attr;
24 24
25 for (i = 0, attr = grp->attrs; *attr; i++, attr++) 25 if (grp->attrs)
26 sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name); 26 for (attr = grp->attrs; *attr; attr++)
27 sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name);
28 if (grp->bin_attrs)
29 for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++)
30 sysfs_remove_bin_file(kobj, *bin_attr);
27} 31}
28 32
29static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj, 33static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
30 const struct attribute_group *grp, int update) 34 const struct attribute_group *grp, int update)
31{ 35{
32 struct attribute *const* attr; 36 struct attribute *const* attr;
37 struct bin_attribute *const* bin_attr;
33 int error = 0, i; 38 int error = 0, i;
34 39
35 for (i = 0, attr = grp->attrs; *attr && !error; i++, attr++) { 40 if (grp->attrs) {
36 umode_t mode = 0; 41 for (i = 0, attr = grp->attrs; *attr && !error; i++, attr++) {
42 umode_t mode = 0;
43
44 /*
45 * In update mode, we're changing the permissions or
46 * visibility. Do this by first removing then
47 * re-adding (if required) the file.
48 */
49 if (update)
50 sysfs_hash_and_remove(dir_sd, NULL,
51 (*attr)->name);
52 if (grp->is_visible) {
53 mode = grp->is_visible(kobj, *attr, i);
54 if (!mode)
55 continue;
56 }
57 error = sysfs_add_file_mode(dir_sd, *attr,
58 SYSFS_KOBJ_ATTR,
59 (*attr)->mode | mode);
60 if (unlikely(error))
61 break;
62 }
63 if (error) {
64 remove_files(dir_sd, kobj, grp);
65 goto exit;
66 }
67 }
37 68
38 /* in update mode, we're changing the permissions or 69 if (grp->bin_attrs) {
39 * visibility. Do this by first removing then 70 for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++) {
40 * re-adding (if required) the file */ 71 if (update)
41 if (update) 72 sysfs_remove_bin_file(kobj, *bin_attr);
42 sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name); 73 error = sysfs_create_bin_file(kobj, *bin_attr);
43 if (grp->is_visible) { 74 if (error)
44 mode = grp->is_visible(kobj, *attr, i); 75 break;
45 if (!mode)
46 continue;
47 } 76 }
48 error = sysfs_add_file_mode(dir_sd, *attr, SYSFS_KOBJ_ATTR, 77 if (error)
49 (*attr)->mode | mode); 78 remove_files(dir_sd, kobj, grp);
50 if (unlikely(error))
51 break;
52 } 79 }
53 if (error) 80exit:
54 remove_files(dir_sd, kobj, grp);
55 return error; 81 return error;
56} 82}
57 83
@@ -67,8 +93,8 @@ static int internal_create_group(struct kobject *kobj, int update,
67 /* Updates may happen before the object has been instantiated */ 93 /* Updates may happen before the object has been instantiated */
68 if (unlikely(update && !kobj->sd)) 94 if (unlikely(update && !kobj->sd))
69 return -EINVAL; 95 return -EINVAL;
70 if (!grp->attrs) { 96 if (!grp->attrs && !grp->bin_attrs) {
71 WARN(1, "sysfs: attrs not set by subsystem for group: %s/%s\n", 97 WARN(1, "sysfs: (bin_)attrs not set by subsystem for group: %s/%s\n",
72 kobj->name, grp->name ? "" : grp->name); 98 kobj->name, grp->name ? "" : grp->name);
73 return -EINVAL; 99 return -EINVAL;
74 } 100 }
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h
index 07d735a80a0f..e5869b50dc41 100644
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/xfs_dinode.h
@@ -39,6 +39,9 @@ typedef struct xfs_timestamp {
39 * There is a very similar struct icdinode in xfs_inode which matches the 39 * There is a very similar struct icdinode in xfs_inode which matches the
40 * layout of the first 96 bytes of this structure, but is kept in native 40 * layout of the first 96 bytes of this structure, but is kept in native
41 * format instead of big endian. 41 * format instead of big endian.
42 *
43 * Note: di_flushiter is only used by v1/2 inodes - it's effectively a zeroed
44 * padding field for v3 inodes.
42 */ 45 */
43typedef struct xfs_dinode { 46typedef struct xfs_dinode {
44 __be16 di_magic; /* inode magic # = XFS_DINODE_MAGIC */ 47 __be16 di_magic; /* inode magic # = XFS_DINODE_MAGIC */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index b78481f99d9d..bb262c25c8de 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -896,7 +896,6 @@ xfs_dinode_to_disk(
896 to->di_projid_lo = cpu_to_be16(from->di_projid_lo); 896 to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
897 to->di_projid_hi = cpu_to_be16(from->di_projid_hi); 897 to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
898 memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); 898 memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
899 to->di_flushiter = cpu_to_be16(from->di_flushiter);
900 to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec); 899 to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
901 to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec); 900 to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec);
902 to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec); 901 to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec);
@@ -924,6 +923,9 @@ xfs_dinode_to_disk(
924 to->di_lsn = cpu_to_be64(from->di_lsn); 923 to->di_lsn = cpu_to_be64(from->di_lsn);
925 memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2)); 924 memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
926 uuid_copy(&to->di_uuid, &from->di_uuid); 925 uuid_copy(&to->di_uuid, &from->di_uuid);
926 to->di_flushiter = 0;
927 } else {
928 to->di_flushiter = cpu_to_be16(from->di_flushiter);
927 } 929 }
928} 930}
929 931
@@ -1029,10 +1031,14 @@ xfs_dinode_calc_crc(
1029/* 1031/*
1030 * Read the disk inode attributes into the in-core inode structure. 1032 * Read the disk inode attributes into the in-core inode structure.
1031 * 1033 *
1032 * If we are initialising a new inode and we are not utilising the 1034 * For version 5 superblocks, if we are initialising a new inode and we are not
1033 * XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new inode core 1035 * utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new
1034 * with a random generation number. If we are keeping inodes around, we need to 1036 * inode core with a random generation number. If we are keeping inodes around,
1035 * read the inode cluster to get the existing generation number off disk. 1037 * we need to read the inode cluster to get the existing generation number off
1038 * disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode
1039 * format) then log recovery is dependent on the di_flushiter field being
1040 * initialised from the current on-disk value and hence we must also read the
1041 * inode off disk.
1036 */ 1042 */
1037int 1043int
1038xfs_iread( 1044xfs_iread(
@@ -1054,6 +1060,7 @@ xfs_iread(
1054 1060
1055 /* shortcut IO on inode allocation if possible */ 1061 /* shortcut IO on inode allocation if possible */
1056 if ((iget_flags & XFS_IGET_CREATE) && 1062 if ((iget_flags & XFS_IGET_CREATE) &&
1063 xfs_sb_version_hascrc(&mp->m_sb) &&
1057 !(mp->m_flags & XFS_MOUNT_IKEEP)) { 1064 !(mp->m_flags & XFS_MOUNT_IKEEP)) {
1058 /* initialise the on-disk inode core */ 1065 /* initialise the on-disk inode core */
1059 memset(&ip->i_d, 0, sizeof(ip->i_d)); 1066 memset(&ip->i_d, 0, sizeof(ip->i_d));
@@ -2882,12 +2889,18 @@ xfs_iflush_int(
2882 __func__, ip->i_ino, ip->i_d.di_forkoff, ip); 2889 __func__, ip->i_ino, ip->i_d.di_forkoff, ip);
2883 goto corrupt_out; 2890 goto corrupt_out;
2884 } 2891 }
2892
2885 /* 2893 /*
2886 * bump the flush iteration count, used to detect flushes which 2894 * Inode item log recovery for v1/v2 inodes are dependent on the
2887 * postdate a log record during recovery. This is redundant as we now 2895 * di_flushiter count for correct sequencing. We bump the flush
2888 * log every change and hence this can't happen. Still, it doesn't hurt. 2896 * iteration count so we can detect flushes which postdate a log record
2897 * during recovery. This is redundant as we now log every change and
2898 * hence this can't happen but we need to still do it to ensure
2899 * backwards compatibility with old kernels that predate logging all
2900 * inode changes.
2889 */ 2901 */
2890 ip->i_d.di_flushiter++; 2902 if (ip->i_d.di_version < 3)
2903 ip->i_d.di_flushiter++;
2891 2904
2892 /* 2905 /*
2893 * Copy the dirty parts of the inode into the on-disk 2906 * Copy the dirty parts of the inode into the on-disk
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 6fcc910a50b9..7681b19aa5dc 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2592,8 +2592,16 @@ xlog_recover_inode_pass2(
2592 goto error; 2592 goto error;
2593 } 2593 }
2594 2594
2595 /* Skip replay when the on disk inode is newer than the log one */ 2595 /*
2596 if (dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) { 2596 * di_flushiter is only valid for v1/2 inodes. All changes for v3 inodes
2597 * are transactional and if ordering is necessary we can determine that
2598 * more accurately by the LSN field in the V3 inode core. Don't trust
2599 * the inode versions we might be changing them here - use the
2600 * superblock flag to determine whether we need to look at di_flushiter
2601 * to skip replay when the on disk inode is newer than the log one
2602 */
2603 if (!xfs_sb_version_hascrc(&mp->m_sb) &&
2604 dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
2597 /* 2605 /*
2598 * Deal with the wrap case, DI_MAX_FLUSH is less 2606 * Deal with the wrap case, DI_MAX_FLUSH is less
2599 * than smaller numbers 2607 * than smaller numbers
@@ -2608,6 +2616,7 @@ xlog_recover_inode_pass2(
2608 goto error; 2616 goto error;
2609 } 2617 }
2610 } 2618 }
2619
2611 /* Take the opportunity to reset the flush iteration count */ 2620 /* Take the opportunity to reset the flush iteration count */
2612 dicp->di_flushiter = 0; 2621 dicp->di_flushiter = 0;
2613 2622