aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2016-03-07 03:27:30 -0500
committerIngo Molnar <mingo@kernel.org>2016-03-07 03:27:30 -0500
commitec87e1cf7d8399d81d8965c6d852f8057a8dd687 (patch)
tree472a168fa4861090edf110c8a9712a5c15ea259f /fs
parent869ae76147ffdf21ad24f0e599303cd58a2bb39f (diff)
parentf6cede5b49e822ebc41a099fe41ab4989f64e2cb (diff)
Merge tag 'v4.5-rc7' into x86/asm, to pick up SMAP fix
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/affs/file.c5
-rw-r--r--fs/binfmt_elf.c2
-rw-r--r--fs/block_dev.c19
-rw-r--r--fs/btrfs/inode.c2
-rw-r--r--fs/btrfs/root-tree.c10
-rw-r--r--fs/ceph/addr.c4
-rw-r--r--fs/ceph/caps.c27
-rw-r--r--fs/ceph/inode.c2
-rw-r--r--fs/ceph/mds_client.c16
-rw-r--r--fs/ceph/mds_client.h1
-rw-r--r--fs/ceph/super.h1
-rw-r--r--fs/cifs/cifs_dfs_ref.c2
-rw-r--r--fs/cifs/cifsencrypt.c2
-rw-r--r--fs/cifs/cifsfs.c1
-rw-r--r--fs/cifs/cifsfs.h12
-rw-r--r--fs/cifs/cifssmb.c21
-rw-r--r--fs/cifs/connect.c3
-rw-r--r--fs/cifs/smb2pdu.c24
-rw-r--r--fs/dax.c21
-rw-r--r--fs/dcache.c20
-rw-r--r--fs/direct-io.c2
-rw-r--r--fs/ext2/file.c19
-rw-r--r--fs/ext2/inode.c16
-rw-r--r--fs/ext4/balloc.c7
-rw-r--r--fs/ext4/crypto.c56
-rw-r--r--fs/ext4/dir.c13
-rw-r--r--fs/ext4/ext4.h1
-rw-r--r--fs/ext4/extents.c4
-rw-r--r--fs/ext4/file.c28
-rw-r--r--fs/ext4/ialloc.c6
-rw-r--r--fs/ext4/inode.c78
-rw-r--r--fs/ext4/ioctl.c7
-rw-r--r--fs/ext4/mballoc.c2
-rw-r--r--fs/ext4/move_extent.c15
-rw-r--r--fs/ext4/namei.c26
-rw-r--r--fs/ext4/resize.c2
-rw-r--r--fs/fs-writeback.c39
-rw-r--r--fs/hpfs/namei.c31
-rw-r--r--fs/inode.c6
-rw-r--r--fs/jffs2/README.Locking5
-rw-r--r--fs/jffs2/build.c75
-rw-r--r--fs/jffs2/file.c39
-rw-r--r--fs/jffs2/gc.c17
-rw-r--r--fs/jffs2/nodelist.h6
-rw-r--r--fs/namei.c22
-rw-r--r--fs/nfs/blocklayout/extent_tree.c10
-rw-r--r--fs/nfs/nfs42proc.c119
-rw-r--r--fs/nfs/nfs4proc.c4
-rw-r--r--fs/nfs/pnfs.c55
-rw-r--r--fs/notify/mark.c53
-rw-r--r--fs/ocfs2/aops.c1
-rw-r--r--fs/pnode.c9
-rw-r--r--fs/read_write.c9
-rw-r--r--fs/super.c1
-rw-r--r--fs/userfaultfd.c6
-rw-r--r--fs/xattr.c6
-rw-r--r--fs/xfs/xfs_aops.c6
-rw-r--r--fs/xfs/xfs_aops.h1
-rw-r--r--fs/xfs/xfs_bmap_util.c3
59 files changed, 686 insertions, 314 deletions
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 0548c53f41d5..22fc7c802d69 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -511,8 +511,6 @@ affs_do_readpage_ofs(struct page *page, unsigned to)
511 pr_debug("%s(%lu, %ld, 0, %d)\n", __func__, inode->i_ino, 511 pr_debug("%s(%lu, %ld, 0, %d)\n", __func__, inode->i_ino,
512 page->index, to); 512 page->index, to);
513 BUG_ON(to > PAGE_CACHE_SIZE); 513 BUG_ON(to > PAGE_CACHE_SIZE);
514 kmap(page);
515 data = page_address(page);
516 bsize = AFFS_SB(sb)->s_data_blksize; 514 bsize = AFFS_SB(sb)->s_data_blksize;
517 tmp = page->index << PAGE_CACHE_SHIFT; 515 tmp = page->index << PAGE_CACHE_SHIFT;
518 bidx = tmp / bsize; 516 bidx = tmp / bsize;
@@ -524,14 +522,15 @@ affs_do_readpage_ofs(struct page *page, unsigned to)
524 return PTR_ERR(bh); 522 return PTR_ERR(bh);
525 tmp = min(bsize - boff, to - pos); 523 tmp = min(bsize - boff, to - pos);
526 BUG_ON(pos + tmp > to || tmp > bsize); 524 BUG_ON(pos + tmp > to || tmp > bsize);
525 data = kmap_atomic(page);
527 memcpy(data + pos, AFFS_DATA(bh) + boff, tmp); 526 memcpy(data + pos, AFFS_DATA(bh) + boff, tmp);
527 kunmap_atomic(data);
528 affs_brelse(bh); 528 affs_brelse(bh);
529 bidx++; 529 bidx++;
530 pos += tmp; 530 pos += tmp;
531 boff = 0; 531 boff = 0;
532 } 532 }
533 flush_dcache_page(page); 533 flush_dcache_page(page);
534 kunmap(page);
535 return 0; 534 return 0;
536} 535}
537 536
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 051ea4809c14..7d914c67a9d0 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -653,7 +653,7 @@ static unsigned long randomize_stack_top(unsigned long stack_top)
653 653
654 if ((current->flags & PF_RANDOMIZE) && 654 if ((current->flags & PF_RANDOMIZE) &&
655 !(current->personality & ADDR_NO_RANDOMIZE)) { 655 !(current->personality & ADDR_NO_RANDOMIZE)) {
656 random_variable = (unsigned long) get_random_int(); 656 random_variable = get_random_long();
657 random_variable &= STACK_RND_MASK; 657 random_variable &= STACK_RND_MASK;
658 random_variable <<= PAGE_SHIFT; 658 random_variable <<= PAGE_SHIFT;
659 } 659 }
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 39b3a174a425..826b164a4b5b 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1201,7 +1201,11 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1201 bdev->bd_disk = disk; 1201 bdev->bd_disk = disk;
1202 bdev->bd_queue = disk->queue; 1202 bdev->bd_queue = disk->queue;
1203 bdev->bd_contains = bdev; 1203 bdev->bd_contains = bdev;
1204 bdev->bd_inode->i_flags = disk->fops->direct_access ? S_DAX : 0; 1204 if (IS_ENABLED(CONFIG_BLK_DEV_DAX) && disk->fops->direct_access)
1205 bdev->bd_inode->i_flags = S_DAX;
1206 else
1207 bdev->bd_inode->i_flags = 0;
1208
1205 if (!partno) { 1209 if (!partno) {
1206 ret = -ENXIO; 1210 ret = -ENXIO;
1207 bdev->bd_part = disk_get_part(disk, partno); 1211 bdev->bd_part = disk_get_part(disk, partno);
@@ -1693,13 +1697,24 @@ static int blkdev_releasepage(struct page *page, gfp_t wait)
1693 return try_to_free_buffers(page); 1697 return try_to_free_buffers(page);
1694} 1698}
1695 1699
1700static int blkdev_writepages(struct address_space *mapping,
1701 struct writeback_control *wbc)
1702{
1703 if (dax_mapping(mapping)) {
1704 struct block_device *bdev = I_BDEV(mapping->host);
1705
1706 return dax_writeback_mapping_range(mapping, bdev, wbc);
1707 }
1708 return generic_writepages(mapping, wbc);
1709}
1710
1696static const struct address_space_operations def_blk_aops = { 1711static const struct address_space_operations def_blk_aops = {
1697 .readpage = blkdev_readpage, 1712 .readpage = blkdev_readpage,
1698 .readpages = blkdev_readpages, 1713 .readpages = blkdev_readpages,
1699 .writepage = blkdev_writepage, 1714 .writepage = blkdev_writepage,
1700 .write_begin = blkdev_write_begin, 1715 .write_begin = blkdev_write_begin,
1701 .write_end = blkdev_write_end, 1716 .write_end = blkdev_write_end,
1702 .writepages = generic_writepages, 1717 .writepages = blkdev_writepages,
1703 .releasepage = blkdev_releasepage, 1718 .releasepage = blkdev_releasepage,
1704 .direct_IO = blkdev_direct_IO, 1719 .direct_IO = blkdev_direct_IO,
1705 .is_dirty_writeback = buffer_check_dirty_writeback, 1720 .is_dirty_writeback = buffer_check_dirty_writeback,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 151b7c71b868..d96f5cf38a2d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7986,6 +7986,7 @@ static void btrfs_endio_direct_read(struct bio *bio)
7986 7986
7987 kfree(dip); 7987 kfree(dip);
7988 7988
7989 dio_bio->bi_error = bio->bi_error;
7989 dio_end_io(dio_bio, bio->bi_error); 7990 dio_end_io(dio_bio, bio->bi_error);
7990 7991
7991 if (io_bio->end_io) 7992 if (io_bio->end_io)
@@ -8040,6 +8041,7 @@ static void btrfs_endio_direct_write(struct bio *bio)
8040 8041
8041 kfree(dip); 8042 kfree(dip);
8042 8043
8044 dio_bio->bi_error = bio->bi_error;
8043 dio_end_io(dio_bio, bio->bi_error); 8045 dio_end_io(dio_bio, bio->bi_error);
8044 bio_put(bio); 8046 bio_put(bio);
8045} 8047}
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 7cf8509deda7..2c849b08a91b 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -310,8 +310,16 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
310 set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state); 310 set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state);
311 311
312 err = btrfs_insert_fs_root(root->fs_info, root); 312 err = btrfs_insert_fs_root(root->fs_info, root);
313 /*
314 * The root might have been inserted already, as before we look
315 * for orphan roots, log replay might have happened, which
316 * triggers a transaction commit and qgroup accounting, which
317 * in turn reads and inserts fs roots while doing backref
318 * walking.
319 */
320 if (err == -EEXIST)
321 err = 0;
313 if (err) { 322 if (err) {
314 BUG_ON(err == -EEXIST);
315 btrfs_free_fs_root(root); 323 btrfs_free_fs_root(root);
316 break; 324 break;
317 } 325 }
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index c22213789090..19adeb0ef82a 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1756,6 +1756,10 @@ int ceph_pool_perm_check(struct ceph_inode_info *ci, int need)
1756 u32 pool; 1756 u32 pool;
1757 int ret, flags; 1757 int ret, flags;
1758 1758
1759 /* does not support pool namespace yet */
1760 if (ci->i_pool_ns_len)
1761 return -EIO;
1762
1759 if (ceph_test_mount_opt(ceph_inode_to_client(&ci->vfs_inode), 1763 if (ceph_test_mount_opt(ceph_inode_to_client(&ci->vfs_inode),
1760 NOPOOLPERM)) 1764 NOPOOLPERM))
1761 return 0; 1765 return 0;
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index cdbf8cf3d52c..6fe0ad26a7df 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2753,7 +2753,8 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
2753 void *inline_data, int inline_len, 2753 void *inline_data, int inline_len,
2754 struct ceph_buffer *xattr_buf, 2754 struct ceph_buffer *xattr_buf,
2755 struct ceph_mds_session *session, 2755 struct ceph_mds_session *session,
2756 struct ceph_cap *cap, int issued) 2756 struct ceph_cap *cap, int issued,
2757 u32 pool_ns_len)
2757 __releases(ci->i_ceph_lock) 2758 __releases(ci->i_ceph_lock)
2758 __releases(mdsc->snap_rwsem) 2759 __releases(mdsc->snap_rwsem)
2759{ 2760{
@@ -2873,6 +2874,8 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
2873 if (newcaps & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR)) { 2874 if (newcaps & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR)) {
2874 /* file layout may have changed */ 2875 /* file layout may have changed */
2875 ci->i_layout = grant->layout; 2876 ci->i_layout = grant->layout;
2877 ci->i_pool_ns_len = pool_ns_len;
2878
2876 /* size/truncate_seq? */ 2879 /* size/truncate_seq? */
2877 queue_trunc = ceph_fill_file_size(inode, issued, 2880 queue_trunc = ceph_fill_file_size(inode, issued,
2878 le32_to_cpu(grant->truncate_seq), 2881 le32_to_cpu(grant->truncate_seq),
@@ -3411,6 +3414,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
3411 u32 inline_len = 0; 3414 u32 inline_len = 0;
3412 void *snaptrace; 3415 void *snaptrace;
3413 size_t snaptrace_len; 3416 size_t snaptrace_len;
3417 u32 pool_ns_len = 0;
3414 void *p, *end; 3418 void *p, *end;
3415 3419
3416 dout("handle_caps from mds%d\n", mds); 3420 dout("handle_caps from mds%d\n", mds);
@@ -3463,6 +3467,21 @@ void ceph_handle_caps(struct ceph_mds_session *session,
3463 p += inline_len; 3467 p += inline_len;
3464 } 3468 }
3465 3469
3470 if (le16_to_cpu(msg->hdr.version) >= 8) {
3471 u64 flush_tid;
3472 u32 caller_uid, caller_gid;
3473 u32 osd_epoch_barrier;
3474 /* version >= 5 */
3475 ceph_decode_32_safe(&p, end, osd_epoch_barrier, bad);
3476 /* version >= 6 */
3477 ceph_decode_64_safe(&p, end, flush_tid, bad);
3478 /* version >= 7 */
3479 ceph_decode_32_safe(&p, end, caller_uid, bad);
3480 ceph_decode_32_safe(&p, end, caller_gid, bad);
3481 /* version >= 8 */
3482 ceph_decode_32_safe(&p, end, pool_ns_len, bad);
3483 }
3484
3466 /* lookup ino */ 3485 /* lookup ino */
3467 inode = ceph_find_inode(sb, vino); 3486 inode = ceph_find_inode(sb, vino);
3468 ci = ceph_inode(inode); 3487 ci = ceph_inode(inode);
@@ -3518,7 +3537,8 @@ void ceph_handle_caps(struct ceph_mds_session *session,
3518 &cap, &issued); 3537 &cap, &issued);
3519 handle_cap_grant(mdsc, inode, h, 3538 handle_cap_grant(mdsc, inode, h,
3520 inline_version, inline_data, inline_len, 3539 inline_version, inline_data, inline_len,
3521 msg->middle, session, cap, issued); 3540 msg->middle, session, cap, issued,
3541 pool_ns_len);
3522 if (realm) 3542 if (realm)
3523 ceph_put_snap_realm(mdsc, realm); 3543 ceph_put_snap_realm(mdsc, realm);
3524 goto done_unlocked; 3544 goto done_unlocked;
@@ -3542,7 +3562,8 @@ void ceph_handle_caps(struct ceph_mds_session *session,
3542 issued |= __ceph_caps_dirty(ci); 3562 issued |= __ceph_caps_dirty(ci);
3543 handle_cap_grant(mdsc, inode, h, 3563 handle_cap_grant(mdsc, inode, h,
3544 inline_version, inline_data, inline_len, 3564 inline_version, inline_data, inline_len,
3545 msg->middle, session, cap, issued); 3565 msg->middle, session, cap, issued,
3566 pool_ns_len);
3546 goto done_unlocked; 3567 goto done_unlocked;
3547 3568
3548 case CEPH_CAP_OP_FLUSH_ACK: 3569 case CEPH_CAP_OP_FLUSH_ACK:
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index fb4ba2e4e2a5..5849b88bbed3 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -396,6 +396,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
396 ci->i_symlink = NULL; 396 ci->i_symlink = NULL;
397 397
398 memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout)); 398 memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout));
399 ci->i_pool_ns_len = 0;
399 400
400 ci->i_fragtree = RB_ROOT; 401 ci->i_fragtree = RB_ROOT;
401 mutex_init(&ci->i_fragtree_mutex); 402 mutex_init(&ci->i_fragtree_mutex);
@@ -756,6 +757,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
756 if (ci->i_layout.fl_pg_pool != info->layout.fl_pg_pool) 757 if (ci->i_layout.fl_pg_pool != info->layout.fl_pg_pool)
757 ci->i_ceph_flags &= ~CEPH_I_POOL_PERM; 758 ci->i_ceph_flags &= ~CEPH_I_POOL_PERM;
758 ci->i_layout = info->layout; 759 ci->i_layout = info->layout;
760 ci->i_pool_ns_len = iinfo->pool_ns_len;
759 761
760 queue_trunc = ceph_fill_file_size(inode, issued, 762 queue_trunc = ceph_fill_file_size(inode, issued,
761 le32_to_cpu(info->truncate_seq), 763 le32_to_cpu(info->truncate_seq),
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index e7b130a637f9..911d64d865f1 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -100,6 +100,14 @@ static int parse_reply_info_in(void **p, void *end,
100 } else 100 } else
101 info->inline_version = CEPH_INLINE_NONE; 101 info->inline_version = CEPH_INLINE_NONE;
102 102
103 if (features & CEPH_FEATURE_FS_FILE_LAYOUT_V2) {
104 ceph_decode_32_safe(p, end, info->pool_ns_len, bad);
105 ceph_decode_need(p, end, info->pool_ns_len, bad);
106 *p += info->pool_ns_len;
107 } else {
108 info->pool_ns_len = 0;
109 }
110
103 return 0; 111 return 0;
104bad: 112bad:
105 return err; 113 return err;
@@ -2298,6 +2306,14 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
2298 ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir), 2306 ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir),
2299 CEPH_CAP_PIN); 2307 CEPH_CAP_PIN);
2300 2308
2309 /* deny access to directories with pool_ns layouts */
2310 if (req->r_inode && S_ISDIR(req->r_inode->i_mode) &&
2311 ceph_inode(req->r_inode)->i_pool_ns_len)
2312 return -EIO;
2313 if (req->r_locked_dir &&
2314 ceph_inode(req->r_locked_dir)->i_pool_ns_len)
2315 return -EIO;
2316
2301 /* issue */ 2317 /* issue */
2302 mutex_lock(&mdsc->mutex); 2318 mutex_lock(&mdsc->mutex);
2303 __register_request(mdsc, req, dir); 2319 __register_request(mdsc, req, dir);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index ccf11ef0ca87..37712ccffcc6 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -44,6 +44,7 @@ struct ceph_mds_reply_info_in {
44 u64 inline_version; 44 u64 inline_version;
45 u32 inline_len; 45 u32 inline_len;
46 char *inline_data; 46 char *inline_data;
47 u32 pool_ns_len;
47}; 48};
48 49
49/* 50/*
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 75b7d125ce66..9c458eb52245 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -287,6 +287,7 @@ struct ceph_inode_info {
287 287
288 struct ceph_dir_layout i_dir_layout; 288 struct ceph_dir_layout i_dir_layout;
289 struct ceph_file_layout i_layout; 289 struct ceph_file_layout i_layout;
290 size_t i_pool_ns_len;
290 char *i_symlink; 291 char *i_symlink;
291 292
292 /* for dirs */ 293 /* for dirs */
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 7dc886c9a78f..e956cba94338 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -175,7 +175,7 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
175 * string to the length of the original string to allow for worst case. 175 * string to the length of the original string to allow for worst case.
176 */ 176 */
177 md_len = strlen(sb_mountdata) + INET6_ADDRSTRLEN; 177 md_len = strlen(sb_mountdata) + INET6_ADDRSTRLEN;
178 mountdata = kzalloc(md_len + 1, GFP_KERNEL); 178 mountdata = kzalloc(md_len + sizeof("ip=") + 1, GFP_KERNEL);
179 if (mountdata == NULL) { 179 if (mountdata == NULL) {
180 rc = -ENOMEM; 180 rc = -ENOMEM;
181 goto compose_mount_options_err; 181 goto compose_mount_options_err;
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index afa09fce8151..e682b36a210f 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -714,7 +714,7 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
714 714
715 ses->auth_key.response = kmalloc(baselen + tilen, GFP_KERNEL); 715 ses->auth_key.response = kmalloc(baselen + tilen, GFP_KERNEL);
716 if (!ses->auth_key.response) { 716 if (!ses->auth_key.response) {
717 rc = ENOMEM; 717 rc = -ENOMEM;
718 ses->auth_key.len = 0; 718 ses->auth_key.len = 0;
719 goto setup_ntlmv2_rsp_ret; 719 goto setup_ntlmv2_rsp_ret;
720 } 720 }
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index c48ca13673e3..2eea40353e60 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -1013,7 +1013,6 @@ const struct file_operations cifs_file_strict_ops = {
1013 .llseek = cifs_llseek, 1013 .llseek = cifs_llseek,
1014 .unlocked_ioctl = cifs_ioctl, 1014 .unlocked_ioctl = cifs_ioctl,
1015 .clone_file_range = cifs_clone_file_range, 1015 .clone_file_range = cifs_clone_file_range,
1016 .clone_file_range = cifs_clone_file_range,
1017 .setlease = cifs_setlease, 1016 .setlease = cifs_setlease,
1018 .fallocate = cifs_fallocate, 1017 .fallocate = cifs_fallocate,
1019}; 1018};
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 68c4547528c4..83aac8ba50b0 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -31,19 +31,15 @@
31 * so that it will fit. We use hash_64 to convert the value to 31 bits, and 31 * so that it will fit. We use hash_64 to convert the value to 31 bits, and
32 * then add 1, to ensure that we don't end up with a 0 as the value. 32 * then add 1, to ensure that we don't end up with a 0 as the value.
33 */ 33 */
34#if BITS_PER_LONG == 64
35static inline ino_t 34static inline ino_t
36cifs_uniqueid_to_ino_t(u64 fileid) 35cifs_uniqueid_to_ino_t(u64 fileid)
37{ 36{
37 if ((sizeof(ino_t)) < (sizeof(u64)))
38 return (ino_t)hash_64(fileid, (sizeof(ino_t) * 8) - 1) + 1;
39
38 return (ino_t)fileid; 40 return (ino_t)fileid;
41
39} 42}
40#else
41static inline ino_t
42cifs_uniqueid_to_ino_t(u64 fileid)
43{
44 return (ino_t)hash_64(fileid, (sizeof(ino_t) * 8) - 1) + 1;
45}
46#endif
47 43
48extern struct file_system_type cifs_fs_type; 44extern struct file_system_type cifs_fs_type;
49extern const struct address_space_operations cifs_addr_ops; 45extern const struct address_space_operations cifs_addr_ops;
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 90b4f9f7de66..76fcb50295a3 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1396,11 +1396,10 @@ openRetry:
1396 * current bigbuf. 1396 * current bigbuf.
1397 */ 1397 */
1398static int 1398static int
1399cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid) 1399discard_remaining_data(struct TCP_Server_Info *server)
1400{ 1400{
1401 unsigned int rfclen = get_rfc1002_length(server->smallbuf); 1401 unsigned int rfclen = get_rfc1002_length(server->smallbuf);
1402 int remaining = rfclen + 4 - server->total_read; 1402 int remaining = rfclen + 4 - server->total_read;
1403 struct cifs_readdata *rdata = mid->callback_data;
1404 1403
1405 while (remaining > 0) { 1404 while (remaining > 0) {
1406 int length; 1405 int length;
@@ -1414,10 +1413,20 @@ cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1414 remaining -= length; 1413 remaining -= length;
1415 } 1414 }
1416 1415
1417 dequeue_mid(mid, rdata->result);
1418 return 0; 1416 return 0;
1419} 1417}
1420 1418
1419static int
1420cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1421{
1422 int length;
1423 struct cifs_readdata *rdata = mid->callback_data;
1424
1425 length = discard_remaining_data(server);
1426 dequeue_mid(mid, rdata->result);
1427 return length;
1428}
1429
1421int 1430int
1422cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) 1431cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1423{ 1432{
@@ -1446,6 +1455,12 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1446 return length; 1455 return length;
1447 server->total_read += length; 1456 server->total_read += length;
1448 1457
1458 if (server->ops->is_status_pending &&
1459 server->ops->is_status_pending(buf, server, 0)) {
1460 discard_remaining_data(server);
1461 return -1;
1462 }
1463
1449 /* Was the SMB read successful? */ 1464 /* Was the SMB read successful? */
1450 rdata->result = server->ops->map_error(buf, false); 1465 rdata->result = server->ops->map_error(buf, false);
1451 if (rdata->result != 0) { 1466 if (rdata->result != 0) {
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 4fbd92d2e113..a763cd3d9e7c 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -2999,8 +2999,7 @@ ip_rfc1001_connect(struct TCP_Server_Info *server)
2999 if (ses_init_buf) { 2999 if (ses_init_buf) {
3000 ses_init_buf->trailer.session_req.called_len = 32; 3000 ses_init_buf->trailer.session_req.called_len = 32;
3001 3001
3002 if (server->server_RFC1001_name && 3002 if (server->server_RFC1001_name[0] != 0)
3003 server->server_RFC1001_name[0] != 0)
3004 rfc1002mangle(ses_init_buf->trailer. 3003 rfc1002mangle(ses_init_buf->trailer.
3005 session_req.called_name, 3004 session_req.called_name,
3006 server->server_RFC1001_name, 3005 server->server_RFC1001_name,
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 10f8d5cf5681..42e1f440eb1e 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1106,21 +1106,25 @@ parse_lease_state(struct TCP_Server_Info *server, struct smb2_create_rsp *rsp,
1106{ 1106{
1107 char *data_offset; 1107 char *data_offset;
1108 struct create_context *cc; 1108 struct create_context *cc;
1109 unsigned int next = 0; 1109 unsigned int next;
1110 unsigned int remaining;
1110 char *name; 1111 char *name;
1111 1112
1112 data_offset = (char *)rsp + 4 + le32_to_cpu(rsp->CreateContextsOffset); 1113 data_offset = (char *)rsp + 4 + le32_to_cpu(rsp->CreateContextsOffset);
1114 remaining = le32_to_cpu(rsp->CreateContextsLength);
1113 cc = (struct create_context *)data_offset; 1115 cc = (struct create_context *)data_offset;
1114 do { 1116 while (remaining >= sizeof(struct create_context)) {
1115 cc = (struct create_context *)((char *)cc + next);
1116 name = le16_to_cpu(cc->NameOffset) + (char *)cc; 1117 name = le16_to_cpu(cc->NameOffset) + (char *)cc;
1117 if (le16_to_cpu(cc->NameLength) != 4 || 1118 if (le16_to_cpu(cc->NameLength) == 4 &&
1118 strncmp(name, "RqLs", 4)) { 1119 strncmp(name, "RqLs", 4) == 0)
1119 next = le32_to_cpu(cc->Next); 1120 return server->ops->parse_lease_buf(cc, epoch);
1120 continue; 1121
1121 } 1122 next = le32_to_cpu(cc->Next);
1122 return server->ops->parse_lease_buf(cc, epoch); 1123 if (!next)
1123 } while (next != 0); 1124 break;
1125 remaining -= next;
1126 cc = (struct create_context *)((char *)cc + next);
1127 }
1124 1128
1125 return 0; 1129 return 0;
1126} 1130}
diff --git a/fs/dax.c b/fs/dax.c
index fc2e3141138b..711172450da6 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -79,15 +79,14 @@ struct page *read_dax_sector(struct block_device *bdev, sector_t n)
79} 79}
80 80
81/* 81/*
82 * dax_clear_blocks() is called from within transaction context from XFS, 82 * dax_clear_sectors() is called from within transaction context from XFS,
83 * and hence this means the stack from this point must follow GFP_NOFS 83 * and hence this means the stack from this point must follow GFP_NOFS
84 * semantics for all operations. 84 * semantics for all operations.
85 */ 85 */
86int dax_clear_blocks(struct inode *inode, sector_t block, long _size) 86int dax_clear_sectors(struct block_device *bdev, sector_t _sector, long _size)
87{ 87{
88 struct block_device *bdev = inode->i_sb->s_bdev;
89 struct blk_dax_ctl dax = { 88 struct blk_dax_ctl dax = {
90 .sector = block << (inode->i_blkbits - 9), 89 .sector = _sector,
91 .size = _size, 90 .size = _size,
92 }; 91 };
93 92
@@ -109,7 +108,7 @@ int dax_clear_blocks(struct inode *inode, sector_t block, long _size)
109 wmb_pmem(); 108 wmb_pmem();
110 return 0; 109 return 0;
111} 110}
112EXPORT_SYMBOL_GPL(dax_clear_blocks); 111EXPORT_SYMBOL_GPL(dax_clear_sectors);
113 112
114/* the clear_pmem() calls are ordered by a wmb_pmem() in the caller */ 113/* the clear_pmem() calls are ordered by a wmb_pmem() in the caller */
115static void dax_new_buf(void __pmem *addr, unsigned size, unsigned first, 114static void dax_new_buf(void __pmem *addr, unsigned size, unsigned first,
@@ -485,11 +484,10 @@ static int dax_writeback_one(struct block_device *bdev,
485 * end]. This is required by data integrity operations to ensure file data is 484 * end]. This is required by data integrity operations to ensure file data is
486 * on persistent storage prior to completion of the operation. 485 * on persistent storage prior to completion of the operation.
487 */ 486 */
488int dax_writeback_mapping_range(struct address_space *mapping, loff_t start, 487int dax_writeback_mapping_range(struct address_space *mapping,
489 loff_t end) 488 struct block_device *bdev, struct writeback_control *wbc)
490{ 489{
491 struct inode *inode = mapping->host; 490 struct inode *inode = mapping->host;
492 struct block_device *bdev = inode->i_sb->s_bdev;
493 pgoff_t start_index, end_index, pmd_index; 491 pgoff_t start_index, end_index, pmd_index;
494 pgoff_t indices[PAGEVEC_SIZE]; 492 pgoff_t indices[PAGEVEC_SIZE];
495 struct pagevec pvec; 493 struct pagevec pvec;
@@ -500,8 +498,11 @@ int dax_writeback_mapping_range(struct address_space *mapping, loff_t start,
500 if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT)) 498 if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT))
501 return -EIO; 499 return -EIO;
502 500
503 start_index = start >> PAGE_CACHE_SHIFT; 501 if (!mapping->nrexceptional || wbc->sync_mode != WB_SYNC_ALL)
504 end_index = end >> PAGE_CACHE_SHIFT; 502 return 0;
503
504 start_index = wbc->range_start >> PAGE_CACHE_SHIFT;
505 end_index = wbc->range_end >> PAGE_CACHE_SHIFT;
505 pmd_index = DAX_PMD_INDEX(start_index); 506 pmd_index = DAX_PMD_INDEX(start_index);
506 507
507 rcu_read_lock(); 508 rcu_read_lock();
diff --git a/fs/dcache.c b/fs/dcache.c
index 92d5140de851..2398f9f94337 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -269,9 +269,6 @@ static inline int dname_external(const struct dentry *dentry)
269 return dentry->d_name.name != dentry->d_iname; 269 return dentry->d_name.name != dentry->d_iname;
270} 270}
271 271
272/*
273 * Make sure other CPUs see the inode attached before the type is set.
274 */
275static inline void __d_set_inode_and_type(struct dentry *dentry, 272static inline void __d_set_inode_and_type(struct dentry *dentry,
276 struct inode *inode, 273 struct inode *inode,
277 unsigned type_flags) 274 unsigned type_flags)
@@ -279,28 +276,18 @@ static inline void __d_set_inode_and_type(struct dentry *dentry,
279 unsigned flags; 276 unsigned flags;
280 277
281 dentry->d_inode = inode; 278 dentry->d_inode = inode;
282 smp_wmb();
283 flags = READ_ONCE(dentry->d_flags); 279 flags = READ_ONCE(dentry->d_flags);
284 flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU); 280 flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU);
285 flags |= type_flags; 281 flags |= type_flags;
286 WRITE_ONCE(dentry->d_flags, flags); 282 WRITE_ONCE(dentry->d_flags, flags);
287} 283}
288 284
289/*
290 * Ideally, we want to make sure that other CPUs see the flags cleared before
291 * the inode is detached, but this is really a violation of RCU principles
292 * since the ordering suggests we should always set inode before flags.
293 *
294 * We should instead replace or discard the entire dentry - but that sucks
295 * performancewise on mass deletion/rename.
296 */
297static inline void __d_clear_type_and_inode(struct dentry *dentry) 285static inline void __d_clear_type_and_inode(struct dentry *dentry)
298{ 286{
299 unsigned flags = READ_ONCE(dentry->d_flags); 287 unsigned flags = READ_ONCE(dentry->d_flags);
300 288
301 flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU); 289 flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU);
302 WRITE_ONCE(dentry->d_flags, flags); 290 WRITE_ONCE(dentry->d_flags, flags);
303 smp_wmb();
304 dentry->d_inode = NULL; 291 dentry->d_inode = NULL;
305} 292}
306 293
@@ -370,9 +357,11 @@ static void dentry_unlink_inode(struct dentry * dentry)
370 __releases(dentry->d_inode->i_lock) 357 __releases(dentry->d_inode->i_lock)
371{ 358{
372 struct inode *inode = dentry->d_inode; 359 struct inode *inode = dentry->d_inode;
360
361 raw_write_seqcount_begin(&dentry->d_seq);
373 __d_clear_type_and_inode(dentry); 362 __d_clear_type_and_inode(dentry);
374 hlist_del_init(&dentry->d_u.d_alias); 363 hlist_del_init(&dentry->d_u.d_alias);
375 dentry_rcuwalk_invalidate(dentry); 364 raw_write_seqcount_end(&dentry->d_seq);
376 spin_unlock(&dentry->d_lock); 365 spin_unlock(&dentry->d_lock);
377 spin_unlock(&inode->i_lock); 366 spin_unlock(&inode->i_lock);
378 if (!inode->i_nlink) 367 if (!inode->i_nlink)
@@ -1758,8 +1747,9 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
1758 spin_lock(&dentry->d_lock); 1747 spin_lock(&dentry->d_lock);
1759 if (inode) 1748 if (inode)
1760 hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); 1749 hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
1750 raw_write_seqcount_begin(&dentry->d_seq);
1761 __d_set_inode_and_type(dentry, inode, add_flags); 1751 __d_set_inode_and_type(dentry, inode, add_flags);
1762 dentry_rcuwalk_invalidate(dentry); 1752 raw_write_seqcount_end(&dentry->d_seq);
1763 spin_unlock(&dentry->d_lock); 1753 spin_unlock(&dentry->d_lock);
1764 fsnotify_d_instantiate(dentry, inode); 1754 fsnotify_d_instantiate(dentry, inode);
1765} 1755}
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 1b2f7ffc8b84..d6a9012d42ad 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -472,8 +472,8 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio)
472 dio->io_error = -EIO; 472 dio->io_error = -EIO;
473 473
474 if (dio->is_async && dio->rw == READ && dio->should_dirty) { 474 if (dio->is_async && dio->rw == READ && dio->should_dirty) {
475 bio_check_pages_dirty(bio); /* transfers ownership */
476 err = bio->bi_error; 475 err = bio->bi_error;
476 bio_check_pages_dirty(bio); /* transfers ownership */
477 } else { 477 } else {
478 bio_for_each_segment_all(bvec, bio, i) { 478 bio_for_each_segment_all(bvec, bio, i) {
479 struct page *page = bvec->bv_page; 479 struct page *page = bvec->bv_page;
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 2c88d683cd91..c1400b109805 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -80,23 +80,6 @@ static int ext2_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
80 return ret; 80 return ret;
81} 81}
82 82
83static int ext2_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
84{
85 struct inode *inode = file_inode(vma->vm_file);
86 struct ext2_inode_info *ei = EXT2_I(inode);
87 int ret;
88
89 sb_start_pagefault(inode->i_sb);
90 file_update_time(vma->vm_file);
91 down_read(&ei->dax_sem);
92
93 ret = __dax_mkwrite(vma, vmf, ext2_get_block, NULL);
94
95 up_read(&ei->dax_sem);
96 sb_end_pagefault(inode->i_sb);
97 return ret;
98}
99
100static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma, 83static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
101 struct vm_fault *vmf) 84 struct vm_fault *vmf)
102{ 85{
@@ -124,7 +107,7 @@ static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
124static const struct vm_operations_struct ext2_dax_vm_ops = { 107static const struct vm_operations_struct ext2_dax_vm_ops = {
125 .fault = ext2_dax_fault, 108 .fault = ext2_dax_fault,
126 .pmd_fault = ext2_dax_pmd_fault, 109 .pmd_fault = ext2_dax_pmd_fault,
127 .page_mkwrite = ext2_dax_mkwrite, 110 .page_mkwrite = ext2_dax_fault,
128 .pfn_mkwrite = ext2_dax_pfn_mkwrite, 111 .pfn_mkwrite = ext2_dax_pfn_mkwrite,
129}; 112};
130 113
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 338eefda70c6..6bd58e6ff038 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -737,8 +737,10 @@ static int ext2_get_blocks(struct inode *inode,
737 * so that it's not found by another thread before it's 737 * so that it's not found by another thread before it's
738 * initialised 738 * initialised
739 */ 739 */
740 err = dax_clear_blocks(inode, le32_to_cpu(chain[depth-1].key), 740 err = dax_clear_sectors(inode->i_sb->s_bdev,
741 1 << inode->i_blkbits); 741 le32_to_cpu(chain[depth-1].key) <<
742 (inode->i_blkbits - 9),
743 1 << inode->i_blkbits);
742 if (err) { 744 if (err) {
743 mutex_unlock(&ei->truncate_mutex); 745 mutex_unlock(&ei->truncate_mutex);
744 goto cleanup; 746 goto cleanup;
@@ -874,6 +876,14 @@ ext2_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
874static int 876static int
875ext2_writepages(struct address_space *mapping, struct writeback_control *wbc) 877ext2_writepages(struct address_space *mapping, struct writeback_control *wbc)
876{ 878{
879#ifdef CONFIG_FS_DAX
880 if (dax_mapping(mapping)) {
881 return dax_writeback_mapping_range(mapping,
882 mapping->host->i_sb->s_bdev,
883 wbc);
884 }
885#endif
886
877 return mpage_writepages(mapping, wbc, ext2_get_block); 887 return mpage_writepages(mapping, wbc, ext2_get_block);
878} 888}
879 889
@@ -1296,7 +1306,7 @@ void ext2_set_inode_flags(struct inode *inode)
1296 inode->i_flags |= S_NOATIME; 1306 inode->i_flags |= S_NOATIME;
1297 if (flags & EXT2_DIRSYNC_FL) 1307 if (flags & EXT2_DIRSYNC_FL)
1298 inode->i_flags |= S_DIRSYNC; 1308 inode->i_flags |= S_DIRSYNC;
1299 if (test_opt(inode->i_sb, DAX)) 1309 if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode))
1300 inode->i_flags |= S_DAX; 1310 inode->i_flags |= S_DAX;
1301} 1311}
1302 1312
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index ec0668a60678..fe1f50fe764f 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -191,7 +191,6 @@ static int ext4_init_block_bitmap(struct super_block *sb,
191 /* If checksum is bad mark all blocks used to prevent allocation 191 /* If checksum is bad mark all blocks used to prevent allocation
192 * essentially implementing a per-group read-only flag. */ 192 * essentially implementing a per-group read-only flag. */
193 if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { 193 if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
194 ext4_error(sb, "Checksum bad for group %u", block_group);
195 grp = ext4_get_group_info(sb, block_group); 194 grp = ext4_get_group_info(sb, block_group);
196 if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) 195 if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
197 percpu_counter_sub(&sbi->s_freeclusters_counter, 196 percpu_counter_sub(&sbi->s_freeclusters_counter,
@@ -442,14 +441,16 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
442 } 441 }
443 ext4_lock_group(sb, block_group); 442 ext4_lock_group(sb, block_group);
444 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 443 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
445
446 err = ext4_init_block_bitmap(sb, bh, block_group, desc); 444 err = ext4_init_block_bitmap(sb, bh, block_group, desc);
447 set_bitmap_uptodate(bh); 445 set_bitmap_uptodate(bh);
448 set_buffer_uptodate(bh); 446 set_buffer_uptodate(bh);
449 ext4_unlock_group(sb, block_group); 447 ext4_unlock_group(sb, block_group);
450 unlock_buffer(bh); 448 unlock_buffer(bh);
451 if (err) 449 if (err) {
450 ext4_error(sb, "Failed to init block bitmap for group "
451 "%u: %d", block_group, err);
452 goto out; 452 goto out;
453 }
453 goto verify; 454 goto verify;
454 } 455 }
455 ext4_unlock_group(sb, block_group); 456 ext4_unlock_group(sb, block_group);
diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c
index c8021208a7eb..38f7562489bb 100644
--- a/fs/ext4/crypto.c
+++ b/fs/ext4/crypto.c
@@ -467,3 +467,59 @@ uint32_t ext4_validate_encryption_key_size(uint32_t mode, uint32_t size)
467 return size; 467 return size;
468 return 0; 468 return 0;
469} 469}
470
471/*
472 * Validate dentries for encrypted directories to make sure we aren't
473 * potentially caching stale data after a key has been added or
474 * removed.
475 */
476static int ext4_d_revalidate(struct dentry *dentry, unsigned int flags)
477{
478 struct inode *dir = d_inode(dentry->d_parent);
479 struct ext4_crypt_info *ci = EXT4_I(dir)->i_crypt_info;
480 int dir_has_key, cached_with_key;
481
482 if (!ext4_encrypted_inode(dir))
483 return 0;
484
485 if (ci && ci->ci_keyring_key &&
486 (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) |
487 (1 << KEY_FLAG_REVOKED) |
488 (1 << KEY_FLAG_DEAD))))
489 ci = NULL;
490
491 /* this should eventually be an flag in d_flags */
492 cached_with_key = dentry->d_fsdata != NULL;
493 dir_has_key = (ci != NULL);
494
495 /*
496 * If the dentry was cached without the key, and it is a
497 * negative dentry, it might be a valid name. We can't check
498 * if the key has since been made available due to locking
499 * reasons, so we fail the validation so ext4_lookup() can do
500 * this check.
501 *
502 * We also fail the validation if the dentry was created with
503 * the key present, but we no longer have the key, or vice versa.
504 */
505 if ((!cached_with_key && d_is_negative(dentry)) ||
506 (!cached_with_key && dir_has_key) ||
507 (cached_with_key && !dir_has_key)) {
508#if 0 /* Revalidation debug */
509 char buf[80];
510 char *cp = simple_dname(dentry, buf, sizeof(buf));
511
512 if (IS_ERR(cp))
513 cp = (char *) "???";
514 pr_err("revalidate: %s %p %d %d %d\n", cp, dentry->d_fsdata,
515 cached_with_key, d_is_negative(dentry),
516 dir_has_key);
517#endif
518 return 0;
519 }
520 return 1;
521}
522
523const struct dentry_operations ext4_encrypted_d_ops = {
524 .d_revalidate = ext4_d_revalidate,
525};
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 1d1bca74f844..33f5e2a50cf8 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -111,6 +111,12 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
111 int dir_has_error = 0; 111 int dir_has_error = 0;
112 struct ext4_str fname_crypto_str = {.name = NULL, .len = 0}; 112 struct ext4_str fname_crypto_str = {.name = NULL, .len = 0};
113 113
114 if (ext4_encrypted_inode(inode)) {
115 err = ext4_get_encryption_info(inode);
116 if (err && err != -ENOKEY)
117 return err;
118 }
119
114 if (is_dx_dir(inode)) { 120 if (is_dx_dir(inode)) {
115 err = ext4_dx_readdir(file, ctx); 121 err = ext4_dx_readdir(file, ctx);
116 if (err != ERR_BAD_DX_DIR) { 122 if (err != ERR_BAD_DX_DIR) {
@@ -157,8 +163,11 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
157 index, 1); 163 index, 1);
158 file->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; 164 file->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
159 bh = ext4_bread(NULL, inode, map.m_lblk, 0); 165 bh = ext4_bread(NULL, inode, map.m_lblk, 0);
160 if (IS_ERR(bh)) 166 if (IS_ERR(bh)) {
161 return PTR_ERR(bh); 167 err = PTR_ERR(bh);
168 bh = NULL;
169 goto errout;
170 }
162 } 171 }
163 172
164 if (!bh) { 173 if (!bh) {
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 0662b285dc8a..157b458a69d4 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2302,6 +2302,7 @@ struct page *ext4_encrypt(struct inode *inode,
2302int ext4_decrypt(struct page *page); 2302int ext4_decrypt(struct page *page);
2303int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk, 2303int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk,
2304 ext4_fsblk_t pblk, ext4_lblk_t len); 2304 ext4_fsblk_t pblk, ext4_lblk_t len);
2305extern const struct dentry_operations ext4_encrypted_d_ops;
2305 2306
2306#ifdef CONFIG_EXT4_FS_ENCRYPTION 2307#ifdef CONFIG_EXT4_FS_ENCRYPTION
2307int ext4_init_crypto(void); 2308int ext4_init_crypto(void);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 0ffabaf90aa5..3753ceb0b0dd 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3928,7 +3928,7 @@ static int
3928convert_initialized_extent(handle_t *handle, struct inode *inode, 3928convert_initialized_extent(handle_t *handle, struct inode *inode,
3929 struct ext4_map_blocks *map, 3929 struct ext4_map_blocks *map,
3930 struct ext4_ext_path **ppath, int flags, 3930 struct ext4_ext_path **ppath, int flags,
3931 unsigned int allocated, ext4_fsblk_t newblock) 3931 unsigned int allocated)
3932{ 3932{
3933 struct ext4_ext_path *path = *ppath; 3933 struct ext4_ext_path *path = *ppath;
3934 struct ext4_extent *ex; 3934 struct ext4_extent *ex;
@@ -4347,7 +4347,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
4347 (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) { 4347 (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) {
4348 allocated = convert_initialized_extent( 4348 allocated = convert_initialized_extent(
4349 handle, inode, map, &path, 4349 handle, inode, map, &path,
4350 flags, allocated, newblock); 4350 flags, allocated);
4351 goto out2; 4351 goto out2;
4352 } else if (!ext4_ext_is_unwritten(ex)) 4352 } else if (!ext4_ext_is_unwritten(ex))
4353 goto out; 4353 goto out;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 1126436dada1..4cd318f31cbe 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -262,23 +262,8 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
262 return result; 262 return result;
263} 263}
264 264
265static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
266{
267 int err;
268 struct inode *inode = file_inode(vma->vm_file);
269
270 sb_start_pagefault(inode->i_sb);
271 file_update_time(vma->vm_file);
272 down_read(&EXT4_I(inode)->i_mmap_sem);
273 err = __dax_mkwrite(vma, vmf, ext4_dax_mmap_get_block, NULL);
274 up_read(&EXT4_I(inode)->i_mmap_sem);
275 sb_end_pagefault(inode->i_sb);
276
277 return err;
278}
279
280/* 265/*
281 * Handle write fault for VM_MIXEDMAP mappings. Similarly to ext4_dax_mkwrite() 266 * Handle write fault for VM_MIXEDMAP mappings. Similarly to ext4_dax_fault()
282 * handler we check for races agaist truncate. Note that since we cycle through 267 * handler we check for races agaist truncate. Note that since we cycle through
283 * i_mmap_sem, we are sure that also any hole punching that began before we 268 * i_mmap_sem, we are sure that also any hole punching that began before we
284 * were called is finished by now and so if it included part of the file we 269 * were called is finished by now and so if it included part of the file we
@@ -311,7 +296,7 @@ static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma,
311static const struct vm_operations_struct ext4_dax_vm_ops = { 296static const struct vm_operations_struct ext4_dax_vm_ops = {
312 .fault = ext4_dax_fault, 297 .fault = ext4_dax_fault,
313 .pmd_fault = ext4_dax_pmd_fault, 298 .pmd_fault = ext4_dax_pmd_fault,
314 .page_mkwrite = ext4_dax_mkwrite, 299 .page_mkwrite = ext4_dax_fault,
315 .pfn_mkwrite = ext4_dax_pfn_mkwrite, 300 .pfn_mkwrite = ext4_dax_pfn_mkwrite,
316}; 301};
317#else 302#else
@@ -350,6 +335,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
350 struct super_block *sb = inode->i_sb; 335 struct super_block *sb = inode->i_sb;
351 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 336 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
352 struct vfsmount *mnt = filp->f_path.mnt; 337 struct vfsmount *mnt = filp->f_path.mnt;
338 struct inode *dir = filp->f_path.dentry->d_parent->d_inode;
353 struct path path; 339 struct path path;
354 char buf[64], *cp; 340 char buf[64], *cp;
355 int ret; 341 int ret;
@@ -393,6 +379,14 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
393 if (ext4_encryption_info(inode) == NULL) 379 if (ext4_encryption_info(inode) == NULL)
394 return -ENOKEY; 380 return -ENOKEY;
395 } 381 }
382 if (ext4_encrypted_inode(dir) &&
383 !ext4_is_child_context_consistent_with_parent(dir, inode)) {
384 ext4_warning(inode->i_sb,
385 "Inconsistent encryption contexts: %lu/%lu\n",
386 (unsigned long) dir->i_ino,
387 (unsigned long) inode->i_ino);
388 return -EPERM;
389 }
396 /* 390 /*
397 * Set up the jbd2_inode if we are opening the inode for 391 * Set up the jbd2_inode if we are opening the inode for
398 * writing and the journal is present 392 * writing and the journal is present
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 3fcfd50a2e8a..acc0ad56bf2f 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -76,7 +76,6 @@ static int ext4_init_inode_bitmap(struct super_block *sb,
76 /* If checksum is bad mark all blocks and inodes use to prevent 76 /* If checksum is bad mark all blocks and inodes use to prevent
77 * allocation, essentially implementing a per-group read-only flag. */ 77 * allocation, essentially implementing a per-group read-only flag. */
78 if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { 78 if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
79 ext4_error(sb, "Checksum bad for group %u", block_group);
80 grp = ext4_get_group_info(sb, block_group); 79 grp = ext4_get_group_info(sb, block_group);
81 if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) 80 if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
82 percpu_counter_sub(&sbi->s_freeclusters_counter, 81 percpu_counter_sub(&sbi->s_freeclusters_counter,
@@ -191,8 +190,11 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
191 set_buffer_verified(bh); 190 set_buffer_verified(bh);
192 ext4_unlock_group(sb, block_group); 191 ext4_unlock_group(sb, block_group);
193 unlock_buffer(bh); 192 unlock_buffer(bh);
194 if (err) 193 if (err) {
194 ext4_error(sb, "Failed to init inode bitmap for group "
195 "%u: %d", block_group, err);
195 goto out; 196 goto out;
197 }
196 return bh; 198 return bh;
197 } 199 }
198 ext4_unlock_group(sb, block_group); 200 ext4_unlock_group(sb, block_group);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 83bc8bfb3bea..aee960b1af34 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -686,6 +686,34 @@ out_sem:
686 return retval; 686 return retval;
687} 687}
688 688
689/*
690 * Update EXT4_MAP_FLAGS in bh->b_state. For buffer heads attached to pages
691 * we have to be careful as someone else may be manipulating b_state as well.
692 */
693static void ext4_update_bh_state(struct buffer_head *bh, unsigned long flags)
694{
695 unsigned long old_state;
696 unsigned long new_state;
697
698 flags &= EXT4_MAP_FLAGS;
699
700 /* Dummy buffer_head? Set non-atomically. */
701 if (!bh->b_page) {
702 bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | flags;
703 return;
704 }
705 /*
706 * Someone else may be modifying b_state. Be careful! This is ugly but
707 * once we get rid of using bh as a container for mapping information
708 * to pass to / from get_block functions, this can go away.
709 */
710 do {
711 old_state = READ_ONCE(bh->b_state);
712 new_state = (old_state & ~EXT4_MAP_FLAGS) | flags;
713 } while (unlikely(
714 cmpxchg(&bh->b_state, old_state, new_state) != old_state));
715}
716
689/* Maximum number of blocks we map for direct IO at once. */ 717/* Maximum number of blocks we map for direct IO at once. */
690#define DIO_MAX_BLOCKS 4096 718#define DIO_MAX_BLOCKS 4096
691 719
@@ -722,7 +750,7 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
722 ext4_io_end_t *io_end = ext4_inode_aio(inode); 750 ext4_io_end_t *io_end = ext4_inode_aio(inode);
723 751
724 map_bh(bh, inode->i_sb, map.m_pblk); 752 map_bh(bh, inode->i_sb, map.m_pblk);
725 bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; 753 ext4_update_bh_state(bh, map.m_flags);
726 if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN) 754 if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN)
727 set_buffer_defer_completion(bh); 755 set_buffer_defer_completion(bh);
728 bh->b_size = inode->i_sb->s_blocksize * map.m_len; 756 bh->b_size = inode->i_sb->s_blocksize * map.m_len;
@@ -1685,7 +1713,7 @@ int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
1685 return ret; 1713 return ret;
1686 1714
1687 map_bh(bh, inode->i_sb, map.m_pblk); 1715 map_bh(bh, inode->i_sb, map.m_pblk);
1688 bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; 1716 ext4_update_bh_state(bh, map.m_flags);
1689 1717
1690 if (buffer_unwritten(bh)) { 1718 if (buffer_unwritten(bh)) {
1691 /* A delayed write to unwritten bh should be marked 1719 /* A delayed write to unwritten bh should be marked
@@ -2450,6 +2478,10 @@ static int ext4_writepages(struct address_space *mapping,
2450 2478
2451 trace_ext4_writepages(inode, wbc); 2479 trace_ext4_writepages(inode, wbc);
2452 2480
2481 if (dax_mapping(mapping))
2482 return dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev,
2483 wbc);
2484
2453 /* 2485 /*
2454 * No pages to write? This is mainly a kludge to avoid starting 2486 * No pages to write? This is mainly a kludge to avoid starting
2455 * a transaction for special inodes like journal inode on last iput() 2487 * a transaction for special inodes like journal inode on last iput()
@@ -3253,29 +3285,29 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
3253 * case, we allocate an io_end structure to hook to the iocb. 3285 * case, we allocate an io_end structure to hook to the iocb.
3254 */ 3286 */
3255 iocb->private = NULL; 3287 iocb->private = NULL;
3256 ext4_inode_aio_set(inode, NULL);
3257 if (!is_sync_kiocb(iocb)) {
3258 io_end = ext4_init_io_end(inode, GFP_NOFS);
3259 if (!io_end) {
3260 ret = -ENOMEM;
3261 goto retake_lock;
3262 }
3263 /*
3264 * Grab reference for DIO. Will be dropped in ext4_end_io_dio()
3265 */
3266 iocb->private = ext4_get_io_end(io_end);
3267 /*
3268 * we save the io structure for current async direct
3269 * IO, so that later ext4_map_blocks() could flag the
3270 * io structure whether there is a unwritten extents
3271 * needs to be converted when IO is completed.
3272 */
3273 ext4_inode_aio_set(inode, io_end);
3274 }
3275
3276 if (overwrite) { 3288 if (overwrite) {
3277 get_block_func = ext4_get_block_overwrite; 3289 get_block_func = ext4_get_block_overwrite;
3278 } else { 3290 } else {
3291 ext4_inode_aio_set(inode, NULL);
3292 if (!is_sync_kiocb(iocb)) {
3293 io_end = ext4_init_io_end(inode, GFP_NOFS);
3294 if (!io_end) {
3295 ret = -ENOMEM;
3296 goto retake_lock;
3297 }
3298 /*
3299 * Grab reference for DIO. Will be dropped in
3300 * ext4_end_io_dio()
3301 */
3302 iocb->private = ext4_get_io_end(io_end);
3303 /*
3304 * we save the io structure for current async direct
3305 * IO, so that later ext4_map_blocks() could flag the
3306 * io structure whether there is a unwritten extents
3307 * needs to be converted when IO is completed.
3308 */
3309 ext4_inode_aio_set(inode, io_end);
3310 }
3279 get_block_func = ext4_get_block_write; 3311 get_block_func = ext4_get_block_write;
3280 dio_flags = DIO_LOCKING; 3312 dio_flags = DIO_LOCKING;
3281 } 3313 }
@@ -4127,7 +4159,7 @@ void ext4_set_inode_flags(struct inode *inode)
4127 new_fl |= S_NOATIME; 4159 new_fl |= S_NOATIME;
4128 if (flags & EXT4_DIRSYNC_FL) 4160 if (flags & EXT4_DIRSYNC_FL)
4129 new_fl |= S_DIRSYNC; 4161 new_fl |= S_DIRSYNC;
4130 if (test_opt(inode->i_sb, DAX)) 4162 if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode))
4131 new_fl |= S_DAX; 4163 new_fl |= S_DAX;
4132 inode_set_flags(inode, new_fl, 4164 inode_set_flags(inode, new_fl,
4133 S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX); 4165 S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 0f6c36922c24..eae5917c534e 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -208,7 +208,7 @@ static int ext4_ioctl_setflags(struct inode *inode,
208{ 208{
209 struct ext4_inode_info *ei = EXT4_I(inode); 209 struct ext4_inode_info *ei = EXT4_I(inode);
210 handle_t *handle = NULL; 210 handle_t *handle = NULL;
211 int err = EPERM, migrate = 0; 211 int err = -EPERM, migrate = 0;
212 struct ext4_iloc iloc; 212 struct ext4_iloc iloc;
213 unsigned int oldflags, mask, i; 213 unsigned int oldflags, mask, i;
214 unsigned int jflag; 214 unsigned int jflag;
@@ -583,6 +583,11 @@ group_extend_out:
583 "Online defrag not supported with bigalloc"); 583 "Online defrag not supported with bigalloc");
584 err = -EOPNOTSUPP; 584 err = -EOPNOTSUPP;
585 goto mext_out; 585 goto mext_out;
586 } else if (IS_DAX(inode)) {
587 ext4_msg(sb, KERN_ERR,
588 "Online defrag not supported with DAX");
589 err = -EOPNOTSUPP;
590 goto mext_out;
586 } 591 }
587 592
588 err = mnt_want_write_file(filp); 593 err = mnt_want_write_file(filp);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 61eaf74dca37..4424b7bf8ac6 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2285,7 +2285,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
2285 if (group == 0) 2285 if (group == 0)
2286 seq_puts(seq, "#group: free frags first [" 2286 seq_puts(seq, "#group: free frags first ["
2287 " 2^0 2^1 2^2 2^3 2^4 2^5 2^6 " 2287 " 2^0 2^1 2^2 2^3 2^4 2^5 2^6 "
2288 " 2^7 2^8 2^9 2^10 2^11 2^12 2^13 ]"); 2288 " 2^7 2^8 2^9 2^10 2^11 2^12 2^13 ]\n");
2289 2289
2290 i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) + 2290 i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
2291 sizeof(struct ext4_group_info); 2291 sizeof(struct ext4_group_info);
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index fb6f11709ae6..e032a0423e35 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -265,11 +265,12 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
265 ext4_lblk_t orig_blk_offset, donor_blk_offset; 265 ext4_lblk_t orig_blk_offset, donor_blk_offset;
266 unsigned long blocksize = orig_inode->i_sb->s_blocksize; 266 unsigned long blocksize = orig_inode->i_sb->s_blocksize;
267 unsigned int tmp_data_size, data_size, replaced_size; 267 unsigned int tmp_data_size, data_size, replaced_size;
268 int err2, jblocks, retries = 0; 268 int i, err2, jblocks, retries = 0;
269 int replaced_count = 0; 269 int replaced_count = 0;
270 int from = data_offset_in_page << orig_inode->i_blkbits; 270 int from = data_offset_in_page << orig_inode->i_blkbits;
271 int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; 271 int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
272 struct super_block *sb = orig_inode->i_sb; 272 struct super_block *sb = orig_inode->i_sb;
273 struct buffer_head *bh = NULL;
273 274
274 /* 275 /*
275 * It needs twice the amount of ordinary journal buffers because 276 * It needs twice the amount of ordinary journal buffers because
@@ -380,8 +381,16 @@ data_copy:
380 } 381 }
381 /* Perform all necessary steps similar write_begin()/write_end() 382 /* Perform all necessary steps similar write_begin()/write_end()
382 * but keeping in mind that i_size will not change */ 383 * but keeping in mind that i_size will not change */
383 *err = __block_write_begin(pagep[0], from, replaced_size, 384 if (!page_has_buffers(pagep[0]))
384 ext4_get_block); 385 create_empty_buffers(pagep[0], 1 << orig_inode->i_blkbits, 0);
386 bh = page_buffers(pagep[0]);
387 for (i = 0; i < data_offset_in_page; i++)
388 bh = bh->b_this_page;
389 for (i = 0; i < block_len_in_page; i++) {
390 *err = ext4_get_block(orig_inode, orig_blk_offset + i, bh, 0);
391 if (*err < 0)
392 break;
393 }
385 if (!*err) 394 if (!*err)
386 *err = block_commit_write(pagep[0], from, from + replaced_size); 395 *err = block_commit_write(pagep[0], from, from + replaced_size);
387 396
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 06574dd77614..48e4b8907826 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1558,6 +1558,24 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
1558 struct ext4_dir_entry_2 *de; 1558 struct ext4_dir_entry_2 *de;
1559 struct buffer_head *bh; 1559 struct buffer_head *bh;
1560 1560
1561 if (ext4_encrypted_inode(dir)) {
1562 int res = ext4_get_encryption_info(dir);
1563
1564 /*
1565 * This should be a properly defined flag for
1566 * dentry->d_flags when we uplift this to the VFS.
1567 * d_fsdata is set to (void *) 1 if if the dentry is
1568 * created while the directory was encrypted and we
1569 * don't have access to the key.
1570 */
1571 dentry->d_fsdata = NULL;
1572 if (ext4_encryption_info(dir))
1573 dentry->d_fsdata = (void *) 1;
1574 d_set_d_op(dentry, &ext4_encrypted_d_ops);
1575 if (res && res != -ENOKEY)
1576 return ERR_PTR(res);
1577 }
1578
1561 if (dentry->d_name.len > EXT4_NAME_LEN) 1579 if (dentry->d_name.len > EXT4_NAME_LEN)
1562 return ERR_PTR(-ENAMETOOLONG); 1580 return ERR_PTR(-ENAMETOOLONG);
1563 1581
@@ -1585,11 +1603,15 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
1585 return ERR_PTR(-EFSCORRUPTED); 1603 return ERR_PTR(-EFSCORRUPTED);
1586 } 1604 }
1587 if (!IS_ERR(inode) && ext4_encrypted_inode(dir) && 1605 if (!IS_ERR(inode) && ext4_encrypted_inode(dir) &&
1588 (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 1606 (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) &&
1589 S_ISLNK(inode->i_mode)) &&
1590 !ext4_is_child_context_consistent_with_parent(dir, 1607 !ext4_is_child_context_consistent_with_parent(dir,
1591 inode)) { 1608 inode)) {
1609 int nokey = ext4_encrypted_inode(inode) &&
1610 !ext4_encryption_info(inode);
1611
1592 iput(inode); 1612 iput(inode);
1613 if (nokey)
1614 return ERR_PTR(-ENOKEY);
1593 ext4_warning(inode->i_sb, 1615 ext4_warning(inode->i_sb,
1594 "Inconsistent encryption contexts: %lu/%lu\n", 1616 "Inconsistent encryption contexts: %lu/%lu\n",
1595 (unsigned long) dir->i_ino, 1617 (unsigned long) dir->i_ino,
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index ad62d7acc315..34038e3598d5 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -198,7 +198,7 @@ static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size)
198 if (flex_gd == NULL) 198 if (flex_gd == NULL)
199 goto out3; 199 goto out3;
200 200
201 if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_flex_group_data)) 201 if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_group_data))
202 goto out2; 202 goto out2;
203 flex_gd->count = flexbg_size; 203 flex_gd->count = flexbg_size;
204 204
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 6915c950e6e8..5c46ed9f3e14 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -223,6 +223,9 @@ static void wb_wait_for_completion(struct backing_dev_info *bdi,
223#define WB_FRN_HIST_MAX_SLOTS (WB_FRN_HIST_THR_SLOTS / 2 + 1) 223#define WB_FRN_HIST_MAX_SLOTS (WB_FRN_HIST_THR_SLOTS / 2 + 1)
224 /* one round can affect upto 5 slots */ 224 /* one round can affect upto 5 slots */
225 225
226static atomic_t isw_nr_in_flight = ATOMIC_INIT(0);
227static struct workqueue_struct *isw_wq;
228
226void __inode_attach_wb(struct inode *inode, struct page *page) 229void __inode_attach_wb(struct inode *inode, struct page *page)
227{ 230{
228 struct backing_dev_info *bdi = inode_to_bdi(inode); 231 struct backing_dev_info *bdi = inode_to_bdi(inode);
@@ -424,6 +427,8 @@ skip_switch:
424 427
425 iput(inode); 428 iput(inode);
426 kfree(isw); 429 kfree(isw);
430
431 atomic_dec(&isw_nr_in_flight);
427} 432}
428 433
429static void inode_switch_wbs_rcu_fn(struct rcu_head *rcu_head) 434static void inode_switch_wbs_rcu_fn(struct rcu_head *rcu_head)
@@ -433,7 +438,7 @@ static void inode_switch_wbs_rcu_fn(struct rcu_head *rcu_head)
433 438
434 /* needs to grab bh-unsafe locks, bounce to work item */ 439 /* needs to grab bh-unsafe locks, bounce to work item */
435 INIT_WORK(&isw->work, inode_switch_wbs_work_fn); 440 INIT_WORK(&isw->work, inode_switch_wbs_work_fn);
436 schedule_work(&isw->work); 441 queue_work(isw_wq, &isw->work);
437} 442}
438 443
439/** 444/**
@@ -469,7 +474,8 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
469 474
470 /* while holding I_WB_SWITCH, no one else can update the association */ 475 /* while holding I_WB_SWITCH, no one else can update the association */
471 spin_lock(&inode->i_lock); 476 spin_lock(&inode->i_lock);
472 if (inode->i_state & (I_WB_SWITCH | I_FREEING) || 477 if (!(inode->i_sb->s_flags & MS_ACTIVE) ||
478 inode->i_state & (I_WB_SWITCH | I_FREEING) ||
473 inode_to_wb(inode) == isw->new_wb) { 479 inode_to_wb(inode) == isw->new_wb) {
474 spin_unlock(&inode->i_lock); 480 spin_unlock(&inode->i_lock);
475 goto out_free; 481 goto out_free;
@@ -480,6 +486,8 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
480 ihold(inode); 486 ihold(inode);
481 isw->inode = inode; 487 isw->inode = inode;
482 488
489 atomic_inc(&isw_nr_in_flight);
490
483 /* 491 /*
484 * In addition to synchronizing among switchers, I_WB_SWITCH tells 492 * In addition to synchronizing among switchers, I_WB_SWITCH tells
485 * the RCU protected stat update paths to grab the mapping's 493 * the RCU protected stat update paths to grab the mapping's
@@ -840,6 +848,33 @@ restart:
840 wb_put(last_wb); 848 wb_put(last_wb);
841} 849}
842 850
851/**
852 * cgroup_writeback_umount - flush inode wb switches for umount
853 *
854 * This function is called when a super_block is about to be destroyed and
855 * flushes in-flight inode wb switches. An inode wb switch goes through
856 * RCU and then workqueue, so the two need to be flushed in order to ensure
857 * that all previously scheduled switches are finished. As wb switches are
858 * rare occurrences and synchronize_rcu() can take a while, perform
859 * flushing iff wb switches are in flight.
860 */
861void cgroup_writeback_umount(void)
862{
863 if (atomic_read(&isw_nr_in_flight)) {
864 synchronize_rcu();
865 flush_workqueue(isw_wq);
866 }
867}
868
869static int __init cgroup_writeback_init(void)
870{
871 isw_wq = alloc_workqueue("inode_switch_wbs", 0, 0);
872 if (!isw_wq)
873 return -ENOMEM;
874 return 0;
875}
876fs_initcall(cgroup_writeback_init);
877
843#else /* CONFIG_CGROUP_WRITEBACK */ 878#else /* CONFIG_CGROUP_WRITEBACK */
844 879
845static struct bdi_writeback * 880static struct bdi_writeback *
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index 506765afa1a3..bb8d67e2740a 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -376,12 +376,11 @@ static int hpfs_unlink(struct inode *dir, struct dentry *dentry)
376 struct inode *inode = d_inode(dentry); 376 struct inode *inode = d_inode(dentry);
377 dnode_secno dno; 377 dnode_secno dno;
378 int r; 378 int r;
379 int rep = 0;
380 int err; 379 int err;
381 380
382 hpfs_lock(dir->i_sb); 381 hpfs_lock(dir->i_sb);
383 hpfs_adjust_length(name, &len); 382 hpfs_adjust_length(name, &len);
384again: 383
385 err = -ENOENT; 384 err = -ENOENT;
386 de = map_dirent(dir, hpfs_i(dir)->i_dno, name, len, &dno, &qbh); 385 de = map_dirent(dir, hpfs_i(dir)->i_dno, name, len, &dno, &qbh);
387 if (!de) 386 if (!de)
@@ -401,33 +400,9 @@ again:
401 hpfs_error(dir->i_sb, "there was error when removing dirent"); 400 hpfs_error(dir->i_sb, "there was error when removing dirent");
402 err = -EFSERROR; 401 err = -EFSERROR;
403 break; 402 break;
404 case 2: /* no space for deleting, try to truncate file */ 403 case 2: /* no space for deleting */
405
406 err = -ENOSPC; 404 err = -ENOSPC;
407 if (rep++) 405 break;
408 break;
409
410 dentry_unhash(dentry);
411 if (!d_unhashed(dentry)) {
412 hpfs_unlock(dir->i_sb);
413 return -ENOSPC;
414 }
415 if (generic_permission(inode, MAY_WRITE) ||
416 !S_ISREG(inode->i_mode) ||
417 get_write_access(inode)) {
418 d_rehash(dentry);
419 } else {
420 struct iattr newattrs;
421 /*pr_info("truncating file before delete.\n");*/
422 newattrs.ia_size = 0;
423 newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
424 err = notify_change(dentry, &newattrs, NULL);
425 put_write_access(inode);
426 if (!err)
427 goto again;
428 }
429 hpfs_unlock(dir->i_sb);
430 return -ENOSPC;
431 default: 406 default:
432 drop_nlink(inode); 407 drop_nlink(inode);
433 err = 0; 408 err = 0;
diff --git a/fs/inode.c b/fs/inode.c
index 9f62db3bcc3e..69b8b526c194 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -154,6 +154,12 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
154 inode->i_rdev = 0; 154 inode->i_rdev = 0;
155 inode->dirtied_when = 0; 155 inode->dirtied_when = 0;
156 156
157#ifdef CONFIG_CGROUP_WRITEBACK
158 inode->i_wb_frn_winner = 0;
159 inode->i_wb_frn_avg_time = 0;
160 inode->i_wb_frn_history = 0;
161#endif
162
157 if (security_inode_alloc(inode)) 163 if (security_inode_alloc(inode))
158 goto out; 164 goto out;
159 spin_lock_init(&inode->i_lock); 165 spin_lock_init(&inode->i_lock);
diff --git a/fs/jffs2/README.Locking b/fs/jffs2/README.Locking
index 3ea36554107f..8918ac905a3b 100644
--- a/fs/jffs2/README.Locking
+++ b/fs/jffs2/README.Locking
@@ -2,10 +2,6 @@
2 JFFS2 LOCKING DOCUMENTATION 2 JFFS2 LOCKING DOCUMENTATION
3 --------------------------- 3 ---------------------------
4 4
5At least theoretically, JFFS2 does not require the Big Kernel Lock
6(BKL), which was always helpfully obtained for it by Linux 2.4 VFS
7code. It has its own locking, as described below.
8
9This document attempts to describe the existing locking rules for 5This document attempts to describe the existing locking rules for
10JFFS2. It is not expected to remain perfectly up to date, but ought to 6JFFS2. It is not expected to remain perfectly up to date, but ought to
11be fairly close. 7be fairly close.
@@ -69,6 +65,7 @@ Ordering constraints:
69 any f->sem held. 65 any f->sem held.
70 2. Never attempt to lock two file mutexes in one thread. 66 2. Never attempt to lock two file mutexes in one thread.
71 No ordering rules have been made for doing so. 67 No ordering rules have been made for doing so.
68 3. Never lock a page cache page with f->sem held.
72 69
73 70
74 erase_completion_lock spinlock 71 erase_completion_lock spinlock
diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c
index 0ae91ad6df2d..b288c8ae1236 100644
--- a/fs/jffs2/build.c
+++ b/fs/jffs2/build.c
@@ -50,7 +50,8 @@ next_inode(int *i, struct jffs2_inode_cache *ic, struct jffs2_sb_info *c)
50 50
51 51
52static void jffs2_build_inode_pass1(struct jffs2_sb_info *c, 52static void jffs2_build_inode_pass1(struct jffs2_sb_info *c,
53 struct jffs2_inode_cache *ic) 53 struct jffs2_inode_cache *ic,
54 int *dir_hardlinks)
54{ 55{
55 struct jffs2_full_dirent *fd; 56 struct jffs2_full_dirent *fd;
56 57
@@ -69,19 +70,21 @@ static void jffs2_build_inode_pass1(struct jffs2_sb_info *c,
69 dbg_fsbuild("child \"%s\" (ino #%u) of dir ino #%u doesn't exist!\n", 70 dbg_fsbuild("child \"%s\" (ino #%u) of dir ino #%u doesn't exist!\n",
70 fd->name, fd->ino, ic->ino); 71 fd->name, fd->ino, ic->ino);
71 jffs2_mark_node_obsolete(c, fd->raw); 72 jffs2_mark_node_obsolete(c, fd->raw);
73 /* Clear the ic/raw union so it doesn't cause problems later. */
74 fd->ic = NULL;
72 continue; 75 continue;
73 } 76 }
74 77
78 /* From this point, fd->raw is no longer used so we can set fd->ic */
79 fd->ic = child_ic;
80 child_ic->pino_nlink++;
81 /* If we appear (at this stage) to have hard-linked directories,
82 * set a flag to trigger a scan later */
75 if (fd->type == DT_DIR) { 83 if (fd->type == DT_DIR) {
76 if (child_ic->pino_nlink) { 84 child_ic->flags |= INO_FLAGS_IS_DIR;
77 JFFS2_ERROR("child dir \"%s\" (ino #%u) of dir ino #%u appears to be a hard link\n", 85 if (child_ic->pino_nlink > 1)
78 fd->name, fd->ino, ic->ino); 86 *dir_hardlinks = 1;
79 /* TODO: What do we do about it? */ 87 }
80 } else {
81 child_ic->pino_nlink = ic->ino;
82 }
83 } else
84 child_ic->pino_nlink++;
85 88
86 dbg_fsbuild("increased nlink for child \"%s\" (ino #%u)\n", fd->name, fd->ino); 89 dbg_fsbuild("increased nlink for child \"%s\" (ino #%u)\n", fd->name, fd->ino);
87 /* Can't free scan_dents so far. We might need them in pass 2 */ 90 /* Can't free scan_dents so far. We might need them in pass 2 */
@@ -95,8 +98,7 @@ static void jffs2_build_inode_pass1(struct jffs2_sb_info *c,
95*/ 98*/
96static int jffs2_build_filesystem(struct jffs2_sb_info *c) 99static int jffs2_build_filesystem(struct jffs2_sb_info *c)
97{ 100{
98 int ret; 101 int ret, i, dir_hardlinks = 0;
99 int i;
100 struct jffs2_inode_cache *ic; 102 struct jffs2_inode_cache *ic;
101 struct jffs2_full_dirent *fd; 103 struct jffs2_full_dirent *fd;
102 struct jffs2_full_dirent *dead_fds = NULL; 104 struct jffs2_full_dirent *dead_fds = NULL;
@@ -120,7 +122,7 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c)
120 /* Now scan the directory tree, increasing nlink according to every dirent found. */ 122 /* Now scan the directory tree, increasing nlink according to every dirent found. */
121 for_each_inode(i, c, ic) { 123 for_each_inode(i, c, ic) {
122 if (ic->scan_dents) { 124 if (ic->scan_dents) {
123 jffs2_build_inode_pass1(c, ic); 125 jffs2_build_inode_pass1(c, ic, &dir_hardlinks);
124 cond_resched(); 126 cond_resched();
125 } 127 }
126 } 128 }
@@ -156,6 +158,20 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c)
156 } 158 }
157 159
158 dbg_fsbuild("pass 2a complete\n"); 160 dbg_fsbuild("pass 2a complete\n");
161
162 if (dir_hardlinks) {
163 /* If we detected directory hardlinks earlier, *hopefully*
164 * they are gone now because some of the links were from
165 * dead directories which still had some old dirents lying
166 * around and not yet garbage-collected, but which have
167 * been discarded above. So clear the pino_nlink field
168 * in each directory, so that the final scan below can
169 * print appropriate warnings. */
170 for_each_inode(i, c, ic) {
171 if (ic->flags & INO_FLAGS_IS_DIR)
172 ic->pino_nlink = 0;
173 }
174 }
159 dbg_fsbuild("freeing temporary data structures\n"); 175 dbg_fsbuild("freeing temporary data structures\n");
160 176
161 /* Finally, we can scan again and free the dirent structs */ 177 /* Finally, we can scan again and free the dirent structs */
@@ -163,6 +179,33 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c)
163 while(ic->scan_dents) { 179 while(ic->scan_dents) {
164 fd = ic->scan_dents; 180 fd = ic->scan_dents;
165 ic->scan_dents = fd->next; 181 ic->scan_dents = fd->next;
182 /* We do use the pino_nlink field to count nlink of
183 * directories during fs build, so set it to the
184 * parent ino# now. Now that there's hopefully only
185 * one. */
186 if (fd->type == DT_DIR) {
187 if (!fd->ic) {
188 /* We'll have complained about it and marked the coresponding
189 raw node obsolete already. Just skip it. */
190 continue;
191 }
192
193 /* We *have* to have set this in jffs2_build_inode_pass1() */
194 BUG_ON(!(fd->ic->flags & INO_FLAGS_IS_DIR));
195
196 /* We clear ic->pino_nlink ∀ directories' ic *only* if dir_hardlinks
197 * is set. Otherwise, we know this should never trigger anyway, so
198 * we don't do the check. And ic->pino_nlink still contains the nlink
199 * value (which is 1). */
200 if (dir_hardlinks && fd->ic->pino_nlink) {
201 JFFS2_ERROR("child dir \"%s\" (ino #%u) of dir ino #%u is also hard linked from dir ino #%u\n",
202 fd->name, fd->ino, ic->ino, fd->ic->pino_nlink);
203 /* Should we unlink it from its previous parent? */
204 }
205
206 /* For directories, ic->pino_nlink holds that parent inode # */
207 fd->ic->pino_nlink = ic->ino;
208 }
166 jffs2_free_full_dirent(fd); 209 jffs2_free_full_dirent(fd);
167 } 210 }
168 ic->scan_dents = NULL; 211 ic->scan_dents = NULL;
@@ -241,11 +284,7 @@ static void jffs2_build_remove_unlinked_inode(struct jffs2_sb_info *c,
241 284
242 /* Reduce nlink of the child. If it's now zero, stick it on the 285 /* Reduce nlink of the child. If it's now zero, stick it on the
243 dead_fds list to be cleaned up later. Else just free the fd */ 286 dead_fds list to be cleaned up later. Else just free the fd */
244 287 child_ic->pino_nlink--;
245 if (fd->type == DT_DIR)
246 child_ic->pino_nlink = 0;
247 else
248 child_ic->pino_nlink--;
249 288
250 if (!child_ic->pino_nlink) { 289 if (!child_ic->pino_nlink) {
251 dbg_fsbuild("inode #%u (\"%s\") now has no links; adding to dead_fds list.\n", 290 dbg_fsbuild("inode #%u (\"%s\") now has no links; adding to dead_fds list.\n",
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index c5ac5944bc1b..cad86bac3453 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -137,39 +137,33 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
137 struct page *pg; 137 struct page *pg;
138 struct inode *inode = mapping->host; 138 struct inode *inode = mapping->host;
139 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); 139 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
140 struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
141 struct jffs2_raw_inode ri;
142 uint32_t alloc_len = 0;
143 pgoff_t index = pos >> PAGE_CACHE_SHIFT; 140 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
144 uint32_t pageofs = index << PAGE_CACHE_SHIFT; 141 uint32_t pageofs = index << PAGE_CACHE_SHIFT;
145 int ret = 0; 142 int ret = 0;
146 143
147 jffs2_dbg(1, "%s()\n", __func__);
148
149 if (pageofs > inode->i_size) {
150 ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len,
151 ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
152 if (ret)
153 return ret;
154 }
155
156 mutex_lock(&f->sem);
157 pg = grab_cache_page_write_begin(mapping, index, flags); 144 pg = grab_cache_page_write_begin(mapping, index, flags);
158 if (!pg) { 145 if (!pg)
159 if (alloc_len)
160 jffs2_complete_reservation(c);
161 mutex_unlock(&f->sem);
162 return -ENOMEM; 146 return -ENOMEM;
163 }
164 *pagep = pg; 147 *pagep = pg;
165 148
166 if (alloc_len) { 149 jffs2_dbg(1, "%s()\n", __func__);
150
151 if (pageofs > inode->i_size) {
167 /* Make new hole frag from old EOF to new page */ 152 /* Make new hole frag from old EOF to new page */
153 struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
154 struct jffs2_raw_inode ri;
168 struct jffs2_full_dnode *fn; 155 struct jffs2_full_dnode *fn;
156 uint32_t alloc_len;
169 157
170 jffs2_dbg(1, "Writing new hole frag 0x%x-0x%x between current EOF and new page\n", 158 jffs2_dbg(1, "Writing new hole frag 0x%x-0x%x between current EOF and new page\n",
171 (unsigned int)inode->i_size, pageofs); 159 (unsigned int)inode->i_size, pageofs);
172 160
161 ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len,
162 ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
163 if (ret)
164 goto out_page;
165
166 mutex_lock(&f->sem);
173 memset(&ri, 0, sizeof(ri)); 167 memset(&ri, 0, sizeof(ri));
174 168
175 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK); 169 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
@@ -196,6 +190,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
196 if (IS_ERR(fn)) { 190 if (IS_ERR(fn)) {
197 ret = PTR_ERR(fn); 191 ret = PTR_ERR(fn);
198 jffs2_complete_reservation(c); 192 jffs2_complete_reservation(c);
193 mutex_unlock(&f->sem);
199 goto out_page; 194 goto out_page;
200 } 195 }
201 ret = jffs2_add_full_dnode_to_inode(c, f, fn); 196 ret = jffs2_add_full_dnode_to_inode(c, f, fn);
@@ -210,10 +205,12 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
210 jffs2_mark_node_obsolete(c, fn->raw); 205 jffs2_mark_node_obsolete(c, fn->raw);
211 jffs2_free_full_dnode(fn); 206 jffs2_free_full_dnode(fn);
212 jffs2_complete_reservation(c); 207 jffs2_complete_reservation(c);
208 mutex_unlock(&f->sem);
213 goto out_page; 209 goto out_page;
214 } 210 }
215 jffs2_complete_reservation(c); 211 jffs2_complete_reservation(c);
216 inode->i_size = pageofs; 212 inode->i_size = pageofs;
213 mutex_unlock(&f->sem);
217 } 214 }
218 215
219 /* 216 /*
@@ -222,18 +219,18 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
222 * case of a short-copy. 219 * case of a short-copy.
223 */ 220 */
224 if (!PageUptodate(pg)) { 221 if (!PageUptodate(pg)) {
222 mutex_lock(&f->sem);
225 ret = jffs2_do_readpage_nolock(inode, pg); 223 ret = jffs2_do_readpage_nolock(inode, pg);
224 mutex_unlock(&f->sem);
226 if (ret) 225 if (ret)
227 goto out_page; 226 goto out_page;
228 } 227 }
229 mutex_unlock(&f->sem);
230 jffs2_dbg(1, "end write_begin(). pg->flags %lx\n", pg->flags); 228 jffs2_dbg(1, "end write_begin(). pg->flags %lx\n", pg->flags);
231 return ret; 229 return ret;
232 230
233out_page: 231out_page:
234 unlock_page(pg); 232 unlock_page(pg);
235 page_cache_release(pg); 233 page_cache_release(pg);
236 mutex_unlock(&f->sem);
237 return ret; 234 return ret;
238} 235}
239 236
diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c
index 5a2dec2b064c..95d5880a63ee 100644
--- a/fs/jffs2/gc.c
+++ b/fs/jffs2/gc.c
@@ -1296,14 +1296,17 @@ static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_era
1296 BUG_ON(start > orig_start); 1296 BUG_ON(start > orig_start);
1297 } 1297 }
1298 1298
1299 /* First, use readpage() to read the appropriate page into the page cache */ 1299 /* The rules state that we must obtain the page lock *before* f->sem, so
1300 /* Q: What happens if we actually try to GC the _same_ page for which commit_write() 1300 * drop f->sem temporarily. Since we also hold c->alloc_sem, nothing's
1301 * triggered garbage collection in the first place? 1301 * actually going to *change* so we're safe; we only allow reading.
1302 * A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the 1302 *
1303 * page OK. We'll actually write it out again in commit_write, which is a little 1303 * It is important to note that jffs2_write_begin() will ensure that its
1304 * suboptimal, but at least we're correct. 1304 * page is marked Uptodate before allocating space. That means that if we
1305 */ 1305 * end up here trying to GC the *same* page that jffs2_write_begin() is
1306 * trying to write out, read_cache_page() will not deadlock. */
1307 mutex_unlock(&f->sem);
1306 pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg); 1308 pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg);
1309 mutex_lock(&f->sem);
1307 1310
1308 if (IS_ERR(pg_ptr)) { 1311 if (IS_ERR(pg_ptr)) {
1309 pr_warn("read_cache_page() returned error: %ld\n", 1312 pr_warn("read_cache_page() returned error: %ld\n",
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index fa35ff79ab35..0637271f3770 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -194,6 +194,7 @@ struct jffs2_inode_cache {
194#define INO_STATE_CLEARING 6 /* In clear_inode() */ 194#define INO_STATE_CLEARING 6 /* In clear_inode() */
195 195
196#define INO_FLAGS_XATTR_CHECKED 0x01 /* has no duplicate xattr_ref */ 196#define INO_FLAGS_XATTR_CHECKED 0x01 /* has no duplicate xattr_ref */
197#define INO_FLAGS_IS_DIR 0x02 /* is a directory */
197 198
198#define RAWNODE_CLASS_INODE_CACHE 0 199#define RAWNODE_CLASS_INODE_CACHE 0
199#define RAWNODE_CLASS_XATTR_DATUM 1 200#define RAWNODE_CLASS_XATTR_DATUM 1
@@ -249,7 +250,10 @@ struct jffs2_readinode_info
249 250
250struct jffs2_full_dirent 251struct jffs2_full_dirent
251{ 252{
252 struct jffs2_raw_node_ref *raw; 253 union {
254 struct jffs2_raw_node_ref *raw;
255 struct jffs2_inode_cache *ic; /* Just during part of build */
256 };
253 struct jffs2_full_dirent *next; 257 struct jffs2_full_dirent *next;
254 uint32_t version; 258 uint32_t version;
255 uint32_t ino; /* == zero for unlink */ 259 uint32_t ino; /* == zero for unlink */
diff --git a/fs/namei.c b/fs/namei.c
index f624d132e01e..9c590e0f66e9 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1712,6 +1712,11 @@ static inline int should_follow_link(struct nameidata *nd, struct path *link,
1712 return 0; 1712 return 0;
1713 if (!follow) 1713 if (!follow)
1714 return 0; 1714 return 0;
1715 /* make sure that d_is_symlink above matches inode */
1716 if (nd->flags & LOOKUP_RCU) {
1717 if (read_seqcount_retry(&link->dentry->d_seq, seq))
1718 return -ECHILD;
1719 }
1715 return pick_link(nd, link, inode, seq); 1720 return pick_link(nd, link, inode, seq);
1716} 1721}
1717 1722
@@ -1743,11 +1748,11 @@ static int walk_component(struct nameidata *nd, int flags)
1743 if (err < 0) 1748 if (err < 0)
1744 return err; 1749 return err;
1745 1750
1746 inode = d_backing_inode(path.dentry);
1747 seq = 0; /* we are already out of RCU mode */ 1751 seq = 0; /* we are already out of RCU mode */
1748 err = -ENOENT; 1752 err = -ENOENT;
1749 if (d_is_negative(path.dentry)) 1753 if (d_is_negative(path.dentry))
1750 goto out_path_put; 1754 goto out_path_put;
1755 inode = d_backing_inode(path.dentry);
1751 } 1756 }
1752 1757
1753 if (flags & WALK_PUT) 1758 if (flags & WALK_PUT)
@@ -3192,12 +3197,12 @@ retry_lookup:
3192 return error; 3197 return error;
3193 3198
3194 BUG_ON(nd->flags & LOOKUP_RCU); 3199 BUG_ON(nd->flags & LOOKUP_RCU);
3195 inode = d_backing_inode(path.dentry);
3196 seq = 0; /* out of RCU mode, so the value doesn't matter */ 3200 seq = 0; /* out of RCU mode, so the value doesn't matter */
3197 if (unlikely(d_is_negative(path.dentry))) { 3201 if (unlikely(d_is_negative(path.dentry))) {
3198 path_to_nameidata(&path, nd); 3202 path_to_nameidata(&path, nd);
3199 return -ENOENT; 3203 return -ENOENT;
3200 } 3204 }
3205 inode = d_backing_inode(path.dentry);
3201finish_lookup: 3206finish_lookup:
3202 if (nd->depth) 3207 if (nd->depth)
3203 put_link(nd); 3208 put_link(nd);
@@ -3206,11 +3211,6 @@ finish_lookup:
3206 if (unlikely(error)) 3211 if (unlikely(error))
3207 return error; 3212 return error;
3208 3213
3209 if (unlikely(d_is_symlink(path.dentry)) && !(open_flag & O_PATH)) {
3210 path_to_nameidata(&path, nd);
3211 return -ELOOP;
3212 }
3213
3214 if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path.mnt) { 3214 if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path.mnt) {
3215 path_to_nameidata(&path, nd); 3215 path_to_nameidata(&path, nd);
3216 } else { 3216 } else {
@@ -3229,6 +3229,10 @@ finish_open:
3229 return error; 3229 return error;
3230 } 3230 }
3231 audit_inode(nd->name, nd->path.dentry, 0); 3231 audit_inode(nd->name, nd->path.dentry, 0);
3232 if (unlikely(d_is_symlink(nd->path.dentry)) && !(open_flag & O_PATH)) {
3233 error = -ELOOP;
3234 goto out;
3235 }
3232 error = -EISDIR; 3236 error = -EISDIR;
3233 if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry)) 3237 if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry))
3234 goto out; 3238 goto out;
@@ -3273,6 +3277,10 @@ opened:
3273 goto exit_fput; 3277 goto exit_fput;
3274 } 3278 }
3275out: 3279out:
3280 if (unlikely(error > 0)) {
3281 WARN_ON(1);
3282 error = -EINVAL;
3283 }
3276 if (got_write) 3284 if (got_write)
3277 mnt_drop_write(nd->path.mnt); 3285 mnt_drop_write(nd->path.mnt);
3278 path_put(&save_parent); 3286 path_put(&save_parent);
diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c
index c59a59c37f3d..35ab51c04814 100644
--- a/fs/nfs/blocklayout/extent_tree.c
+++ b/fs/nfs/blocklayout/extent_tree.c
@@ -476,6 +476,7 @@ static void ext_tree_free_commitdata(struct nfs4_layoutcommit_args *arg,
476 476
477 for (i = 0; i < nr_pages; i++) 477 for (i = 0; i < nr_pages; i++)
478 put_page(arg->layoutupdate_pages[i]); 478 put_page(arg->layoutupdate_pages[i]);
479 vfree(arg->start_p);
479 kfree(arg->layoutupdate_pages); 480 kfree(arg->layoutupdate_pages);
480 } else { 481 } else {
481 put_page(arg->layoutupdate_page); 482 put_page(arg->layoutupdate_page);
@@ -559,10 +560,15 @@ retry:
559 560
560 if (unlikely(arg->layoutupdate_pages != &arg->layoutupdate_page)) { 561 if (unlikely(arg->layoutupdate_pages != &arg->layoutupdate_page)) {
561 void *p = start_p, *end = p + arg->layoutupdate_len; 562 void *p = start_p, *end = p + arg->layoutupdate_len;
563 struct page *page = NULL;
562 int i = 0; 564 int i = 0;
563 565
564 for ( ; p < end; p += PAGE_SIZE) 566 arg->start_p = start_p;
565 arg->layoutupdate_pages[i++] = vmalloc_to_page(p); 567 for ( ; p < end; p += PAGE_SIZE) {
568 page = vmalloc_to_page(p);
569 arg->layoutupdate_pages[i++] = page;
570 get_page(page);
571 }
566 } 572 }
567 573
568 dprintk("%s found %zu ranges\n", __func__, count); 574 dprintk("%s found %zu ranges\n", __func__, count);
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index bd25dc7077f7..dff83460e5a6 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -16,29 +16,8 @@
16 16
17#define NFSDBG_FACILITY NFSDBG_PROC 17#define NFSDBG_FACILITY NFSDBG_PROC
18 18
19static int nfs42_set_rw_stateid(nfs4_stateid *dst, struct file *file,
20 fmode_t fmode)
21{
22 struct nfs_open_context *open;
23 struct nfs_lock_context *lock;
24 int ret;
25
26 open = get_nfs_open_context(nfs_file_open_context(file));
27 lock = nfs_get_lock_context(open);
28 if (IS_ERR(lock)) {
29 put_nfs_open_context(open);
30 return PTR_ERR(lock);
31 }
32
33 ret = nfs4_set_rw_stateid(dst, open, lock, fmode);
34
35 nfs_put_lock_context(lock);
36 put_nfs_open_context(open);
37 return ret;
38}
39
40static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, 19static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
41 loff_t offset, loff_t len) 20 struct nfs_lock_context *lock, loff_t offset, loff_t len)
42{ 21{
43 struct inode *inode = file_inode(filep); 22 struct inode *inode = file_inode(filep);
44 struct nfs_server *server = NFS_SERVER(inode); 23 struct nfs_server *server = NFS_SERVER(inode);
@@ -56,7 +35,8 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
56 msg->rpc_argp = &args; 35 msg->rpc_argp = &args;
57 msg->rpc_resp = &res; 36 msg->rpc_resp = &res;
58 37
59 status = nfs42_set_rw_stateid(&args.falloc_stateid, filep, FMODE_WRITE); 38 status = nfs4_set_rw_stateid(&args.falloc_stateid, lock->open_context,
39 lock, FMODE_WRITE);
60 if (status) 40 if (status)
61 return status; 41 return status;
62 42
@@ -78,15 +58,26 @@ static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
78{ 58{
79 struct nfs_server *server = NFS_SERVER(file_inode(filep)); 59 struct nfs_server *server = NFS_SERVER(file_inode(filep));
80 struct nfs4_exception exception = { }; 60 struct nfs4_exception exception = { };
61 struct nfs_lock_context *lock;
81 int err; 62 int err;
82 63
64 lock = nfs_get_lock_context(nfs_file_open_context(filep));
65 if (IS_ERR(lock))
66 return PTR_ERR(lock);
67
68 exception.inode = file_inode(filep);
69 exception.state = lock->open_context->state;
70
83 do { 71 do {
84 err = _nfs42_proc_fallocate(msg, filep, offset, len); 72 err = _nfs42_proc_fallocate(msg, filep, lock, offset, len);
85 if (err == -ENOTSUPP) 73 if (err == -ENOTSUPP) {
86 return -EOPNOTSUPP; 74 err = -EOPNOTSUPP;
75 break;
76 }
87 err = nfs4_handle_exception(server, err, &exception); 77 err = nfs4_handle_exception(server, err, &exception);
88 } while (exception.retry); 78 } while (exception.retry);
89 79
80 nfs_put_lock_context(lock);
90 return err; 81 return err;
91} 82}
92 83
@@ -135,7 +126,8 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len)
135 return err; 126 return err;
136} 127}
137 128
138static loff_t _nfs42_proc_llseek(struct file *filep, loff_t offset, int whence) 129static loff_t _nfs42_proc_llseek(struct file *filep,
130 struct nfs_lock_context *lock, loff_t offset, int whence)
139{ 131{
140 struct inode *inode = file_inode(filep); 132 struct inode *inode = file_inode(filep);
141 struct nfs42_seek_args args = { 133 struct nfs42_seek_args args = {
@@ -156,7 +148,8 @@ static loff_t _nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
156 if (!nfs_server_capable(inode, NFS_CAP_SEEK)) 148 if (!nfs_server_capable(inode, NFS_CAP_SEEK))
157 return -ENOTSUPP; 149 return -ENOTSUPP;
158 150
159 status = nfs42_set_rw_stateid(&args.sa_stateid, filep, FMODE_READ); 151 status = nfs4_set_rw_stateid(&args.sa_stateid, lock->open_context,
152 lock, FMODE_READ);
160 if (status) 153 if (status)
161 return status; 154 return status;
162 155
@@ -175,17 +168,28 @@ loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
175{ 168{
176 struct nfs_server *server = NFS_SERVER(file_inode(filep)); 169 struct nfs_server *server = NFS_SERVER(file_inode(filep));
177 struct nfs4_exception exception = { }; 170 struct nfs4_exception exception = { };
171 struct nfs_lock_context *lock;
178 loff_t err; 172 loff_t err;
179 173
174 lock = nfs_get_lock_context(nfs_file_open_context(filep));
175 if (IS_ERR(lock))
176 return PTR_ERR(lock);
177
178 exception.inode = file_inode(filep);
179 exception.state = lock->open_context->state;
180
180 do { 181 do {
181 err = _nfs42_proc_llseek(filep, offset, whence); 182 err = _nfs42_proc_llseek(filep, lock, offset, whence);
182 if (err >= 0) 183 if (err >= 0)
183 break; 184 break;
184 if (err == -ENOTSUPP) 185 if (err == -ENOTSUPP) {
185 return -EOPNOTSUPP; 186 err = -EOPNOTSUPP;
187 break;
188 }
186 err = nfs4_handle_exception(server, err, &exception); 189 err = nfs4_handle_exception(server, err, &exception);
187 } while (exception.retry); 190 } while (exception.retry);
188 191
192 nfs_put_lock_context(lock);
189 return err; 193 return err;
190} 194}
191 195
@@ -298,8 +302,9 @@ int nfs42_proc_layoutstats_generic(struct nfs_server *server,
298} 302}
299 303
300static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f, 304static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f,
301 struct file *dst_f, loff_t src_offset, 305 struct file *dst_f, struct nfs_lock_context *src_lock,
302 loff_t dst_offset, loff_t count) 306 struct nfs_lock_context *dst_lock, loff_t src_offset,
307 loff_t dst_offset, loff_t count)
303{ 308{
304 struct inode *src_inode = file_inode(src_f); 309 struct inode *src_inode = file_inode(src_f);
305 struct inode *dst_inode = file_inode(dst_f); 310 struct inode *dst_inode = file_inode(dst_f);
@@ -320,11 +325,13 @@ static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f,
320 msg->rpc_argp = &args; 325 msg->rpc_argp = &args;
321 msg->rpc_resp = &res; 326 msg->rpc_resp = &res;
322 327
323 status = nfs42_set_rw_stateid(&args.src_stateid, src_f, FMODE_READ); 328 status = nfs4_set_rw_stateid(&args.src_stateid, src_lock->open_context,
329 src_lock, FMODE_READ);
324 if (status) 330 if (status)
325 return status; 331 return status;
326 332
327 status = nfs42_set_rw_stateid(&args.dst_stateid, dst_f, FMODE_WRITE); 333 status = nfs4_set_rw_stateid(&args.dst_stateid, dst_lock->open_context,
334 dst_lock, FMODE_WRITE);
328 if (status) 335 if (status)
329 return status; 336 return status;
330 337
@@ -349,22 +356,48 @@ int nfs42_proc_clone(struct file *src_f, struct file *dst_f,
349 }; 356 };
350 struct inode *inode = file_inode(src_f); 357 struct inode *inode = file_inode(src_f);
351 struct nfs_server *server = NFS_SERVER(file_inode(src_f)); 358 struct nfs_server *server = NFS_SERVER(file_inode(src_f));
352 struct nfs4_exception exception = { }; 359 struct nfs_lock_context *src_lock;
353 int err; 360 struct nfs_lock_context *dst_lock;
361 struct nfs4_exception src_exception = { };
362 struct nfs4_exception dst_exception = { };
363 int err, err2;
354 364
355 if (!nfs_server_capable(inode, NFS_CAP_CLONE)) 365 if (!nfs_server_capable(inode, NFS_CAP_CLONE))
356 return -EOPNOTSUPP; 366 return -EOPNOTSUPP;
357 367
368 src_lock = nfs_get_lock_context(nfs_file_open_context(src_f));
369 if (IS_ERR(src_lock))
370 return PTR_ERR(src_lock);
371
372 src_exception.inode = file_inode(src_f);
373 src_exception.state = src_lock->open_context->state;
374
375 dst_lock = nfs_get_lock_context(nfs_file_open_context(dst_f));
376 if (IS_ERR(dst_lock)) {
377 err = PTR_ERR(dst_lock);
378 goto out_put_src_lock;
379 }
380
381 dst_exception.inode = file_inode(dst_f);
382 dst_exception.state = dst_lock->open_context->state;
383
358 do { 384 do {
359 err = _nfs42_proc_clone(&msg, src_f, dst_f, src_offset, 385 err = _nfs42_proc_clone(&msg, src_f, dst_f, src_lock, dst_lock,
360 dst_offset, count); 386 src_offset, dst_offset, count);
361 if (err == -ENOTSUPP || err == -EOPNOTSUPP) { 387 if (err == -ENOTSUPP || err == -EOPNOTSUPP) {
362 NFS_SERVER(inode)->caps &= ~NFS_CAP_CLONE; 388 NFS_SERVER(inode)->caps &= ~NFS_CAP_CLONE;
363 return -EOPNOTSUPP; 389 err = -EOPNOTSUPP;
390 break;
364 } 391 }
365 err = nfs4_handle_exception(server, err, &exception);
366 } while (exception.retry);
367 392
368 return err; 393 err2 = nfs4_handle_exception(server, err, &src_exception);
394 err = nfs4_handle_exception(server, err, &dst_exception);
395 if (!err)
396 err = err2;
397 } while (src_exception.retry || dst_exception.retry);
369 398
399 nfs_put_lock_context(dst_lock);
400out_put_src_lock:
401 nfs_put_lock_context(src_lock);
402 return err;
370} 403}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4bfc33ad0563..14881594dd07 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2466,9 +2466,9 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
2466 dentry = d_add_unique(dentry, igrab(state->inode)); 2466 dentry = d_add_unique(dentry, igrab(state->inode));
2467 if (dentry == NULL) { 2467 if (dentry == NULL) {
2468 dentry = opendata->dentry; 2468 dentry = opendata->dentry;
2469 } else if (dentry != ctx->dentry) { 2469 } else {
2470 dput(ctx->dentry); 2470 dput(ctx->dentry);
2471 ctx->dentry = dget(dentry); 2471 ctx->dentry = dentry;
2472 } 2472 }
2473 nfs_set_verifier(dentry, 2473 nfs_set_verifier(dentry,
2474 nfs_save_change_attribute(d_inode(opendata->dir))); 2474 nfs_save_change_attribute(d_inode(opendata->dir)));
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 482b6e94bb37..2fa483e6dbe2 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -252,6 +252,27 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo)
252 } 252 }
253} 253}
254 254
255/*
256 * Mark a pnfs_layout_hdr and all associated layout segments as invalid
257 *
258 * In order to continue using the pnfs_layout_hdr, a full recovery
259 * is required.
260 * Note that caller must hold inode->i_lock.
261 */
262static int
263pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo,
264 struct list_head *lseg_list)
265{
266 struct pnfs_layout_range range = {
267 .iomode = IOMODE_ANY,
268 .offset = 0,
269 .length = NFS4_MAX_UINT64,
270 };
271
272 set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
273 return pnfs_mark_matching_lsegs_invalid(lo, lseg_list, &range);
274}
275
255static int 276static int
256pnfs_iomode_to_fail_bit(u32 iomode) 277pnfs_iomode_to_fail_bit(u32 iomode)
257{ 278{
@@ -554,9 +575,8 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
554 spin_lock(&nfsi->vfs_inode.i_lock); 575 spin_lock(&nfsi->vfs_inode.i_lock);
555 lo = nfsi->layout; 576 lo = nfsi->layout;
556 if (lo) { 577 if (lo) {
557 lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */
558 pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL);
559 pnfs_get_layout_hdr(lo); 578 pnfs_get_layout_hdr(lo);
579 pnfs_mark_layout_stateid_invalid(lo, &tmp_list);
560 pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED); 580 pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED);
561 pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED); 581 pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED);
562 spin_unlock(&nfsi->vfs_inode.i_lock); 582 spin_unlock(&nfsi->vfs_inode.i_lock);
@@ -617,11 +637,6 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
617{ 637{
618 struct pnfs_layout_hdr *lo; 638 struct pnfs_layout_hdr *lo;
619 struct inode *inode; 639 struct inode *inode;
620 struct pnfs_layout_range range = {
621 .iomode = IOMODE_ANY,
622 .offset = 0,
623 .length = NFS4_MAX_UINT64,
624 };
625 LIST_HEAD(lseg_list); 640 LIST_HEAD(lseg_list);
626 int ret = 0; 641 int ret = 0;
627 642
@@ -636,11 +651,11 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
636 651
637 spin_lock(&inode->i_lock); 652 spin_lock(&inode->i_lock);
638 list_del_init(&lo->plh_bulk_destroy); 653 list_del_init(&lo->plh_bulk_destroy);
639 lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */ 654 if (pnfs_mark_layout_stateid_invalid(lo, &lseg_list)) {
640 if (is_bulk_recall) 655 if (is_bulk_recall)
641 set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); 656 set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
642 if (pnfs_mark_matching_lsegs_invalid(lo, &lseg_list, &range))
643 ret = -EAGAIN; 657 ret = -EAGAIN;
658 }
644 spin_unlock(&inode->i_lock); 659 spin_unlock(&inode->i_lock);
645 pnfs_free_lseg_list(&lseg_list); 660 pnfs_free_lseg_list(&lseg_list);
646 /* Free all lsegs that are attached to commit buckets */ 661 /* Free all lsegs that are attached to commit buckets */
@@ -1738,8 +1753,19 @@ pnfs_set_plh_return_iomode(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode)
1738 if (lo->plh_return_iomode != 0) 1753 if (lo->plh_return_iomode != 0)
1739 iomode = IOMODE_ANY; 1754 iomode = IOMODE_ANY;
1740 lo->plh_return_iomode = iomode; 1755 lo->plh_return_iomode = iomode;
1756 set_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
1741} 1757}
1742 1758
1759/**
1760 * pnfs_mark_matching_lsegs_return - Free or return matching layout segments
1761 * @lo: pointer to layout header
1762 * @tmp_list: list header to be used with pnfs_free_lseg_list()
1763 * @return_range: describe layout segment ranges to be returned
1764 *
1765 * This function is mainly intended for use by layoutrecall. It attempts
1766 * to free the layout segment immediately, or else to mark it for return
1767 * as soon as its reference count drops to zero.
1768 */
1743int 1769int
1744pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, 1770pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
1745 struct list_head *tmp_list, 1771 struct list_head *tmp_list,
@@ -1762,12 +1788,11 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
1762 lseg, lseg->pls_range.iomode, 1788 lseg, lseg->pls_range.iomode,
1763 lseg->pls_range.offset, 1789 lseg->pls_range.offset,
1764 lseg->pls_range.length); 1790 lseg->pls_range.length);
1791 if (mark_lseg_invalid(lseg, tmp_list))
1792 continue;
1793 remaining++;
1765 set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags); 1794 set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags);
1766 pnfs_set_plh_return_iomode(lo, return_range->iomode); 1795 pnfs_set_plh_return_iomode(lo, return_range->iomode);
1767 if (!mark_lseg_invalid(lseg, tmp_list))
1768 remaining++;
1769 set_bit(NFS_LAYOUT_RETURN_REQUESTED,
1770 &lo->plh_flags);
1771 } 1796 }
1772 return remaining; 1797 return remaining;
1773} 1798}
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index cfcbf114676e..7115c5d7d373 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -91,7 +91,14 @@
91#include <linux/fsnotify_backend.h> 91#include <linux/fsnotify_backend.h>
92#include "fsnotify.h" 92#include "fsnotify.h"
93 93
94#define FSNOTIFY_REAPER_DELAY (1) /* 1 jiffy */
95
94struct srcu_struct fsnotify_mark_srcu; 96struct srcu_struct fsnotify_mark_srcu;
97static DEFINE_SPINLOCK(destroy_lock);
98static LIST_HEAD(destroy_list);
99
100static void fsnotify_mark_destroy(struct work_struct *work);
101static DECLARE_DELAYED_WORK(reaper_work, fsnotify_mark_destroy);
95 102
96void fsnotify_get_mark(struct fsnotify_mark *mark) 103void fsnotify_get_mark(struct fsnotify_mark *mark)
97{ 104{
@@ -165,19 +172,10 @@ void fsnotify_detach_mark(struct fsnotify_mark *mark)
165 atomic_dec(&group->num_marks); 172 atomic_dec(&group->num_marks);
166} 173}
167 174
168static void
169fsnotify_mark_free_rcu(struct rcu_head *rcu)
170{
171 struct fsnotify_mark *mark;
172
173 mark = container_of(rcu, struct fsnotify_mark, g_rcu);
174 fsnotify_put_mark(mark);
175}
176
177/* 175/*
178 * Free fsnotify mark. The freeing is actually happening from a call_srcu 176 * Free fsnotify mark. The freeing is actually happening from a kthread which
179 * callback. Caller must have a reference to the mark or be protected by 177 * first waits for srcu period end. Caller must have a reference to the mark
180 * fsnotify_mark_srcu. 178 * or be protected by fsnotify_mark_srcu.
181 */ 179 */
182void fsnotify_free_mark(struct fsnotify_mark *mark) 180void fsnotify_free_mark(struct fsnotify_mark *mark)
183{ 181{
@@ -192,7 +190,11 @@ void fsnotify_free_mark(struct fsnotify_mark *mark)
192 mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE; 190 mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
193 spin_unlock(&mark->lock); 191 spin_unlock(&mark->lock);
194 192
195 call_srcu(&fsnotify_mark_srcu, &mark->g_rcu, fsnotify_mark_free_rcu); 193 spin_lock(&destroy_lock);
194 list_add(&mark->g_list, &destroy_list);
195 spin_unlock(&destroy_lock);
196 queue_delayed_work(system_unbound_wq, &reaper_work,
197 FSNOTIFY_REAPER_DELAY);
196 198
197 /* 199 /*
198 * Some groups like to know that marks are being freed. This is a 200 * Some groups like to know that marks are being freed. This is a
@@ -388,7 +390,12 @@ err:
388 390
389 spin_unlock(&mark->lock); 391 spin_unlock(&mark->lock);
390 392
391 call_srcu(&fsnotify_mark_srcu, &mark->g_rcu, fsnotify_mark_free_rcu); 393 spin_lock(&destroy_lock);
394 list_add(&mark->g_list, &destroy_list);
395 spin_unlock(&destroy_lock);
396 queue_delayed_work(system_unbound_wq, &reaper_work,
397 FSNOTIFY_REAPER_DELAY);
398
392 return ret; 399 return ret;
393} 400}
394 401
@@ -491,3 +498,21 @@ void fsnotify_init_mark(struct fsnotify_mark *mark,
491 atomic_set(&mark->refcnt, 1); 498 atomic_set(&mark->refcnt, 1);
492 mark->free_mark = free_mark; 499 mark->free_mark = free_mark;
493} 500}
501
502static void fsnotify_mark_destroy(struct work_struct *work)
503{
504 struct fsnotify_mark *mark, *next;
505 struct list_head private_destroy_list;
506
507 spin_lock(&destroy_lock);
508 /* exchange the list head */
509 list_replace_init(&destroy_list, &private_destroy_list);
510 spin_unlock(&destroy_lock);
511
512 synchronize_srcu(&fsnotify_mark_srcu);
513
514 list_for_each_entry_safe(mark, next, &private_destroy_list, g_list) {
515 list_del_init(&mark->g_list);
516 fsnotify_put_mark(mark);
517 }
518}
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 794fd1587f34..cda0361e95a4 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -956,6 +956,7 @@ clean_orphan:
956 tmp_ret = ocfs2_del_inode_from_orphan(osb, inode, di_bh, 956 tmp_ret = ocfs2_del_inode_from_orphan(osb, inode, di_bh,
957 update_isize, end); 957 update_isize, end);
958 if (tmp_ret < 0) { 958 if (tmp_ret < 0) {
959 ocfs2_inode_unlock(inode, 1);
959 ret = tmp_ret; 960 ret = tmp_ret;
960 mlog_errno(ret); 961 mlog_errno(ret);
961 brelse(di_bh); 962 brelse(di_bh);
diff --git a/fs/pnode.c b/fs/pnode.c
index 6367e1e435c6..c524fdddc7fb 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -202,6 +202,11 @@ static struct mount *last_dest, *last_source, *dest_master;
202static struct mountpoint *mp; 202static struct mountpoint *mp;
203static struct hlist_head *list; 203static struct hlist_head *list;
204 204
205static inline bool peers(struct mount *m1, struct mount *m2)
206{
207 return m1->mnt_group_id == m2->mnt_group_id && m1->mnt_group_id;
208}
209
205static int propagate_one(struct mount *m) 210static int propagate_one(struct mount *m)
206{ 211{
207 struct mount *child; 212 struct mount *child;
@@ -212,7 +217,7 @@ static int propagate_one(struct mount *m)
212 /* skip if mountpoint isn't covered by it */ 217 /* skip if mountpoint isn't covered by it */
213 if (!is_subdir(mp->m_dentry, m->mnt.mnt_root)) 218 if (!is_subdir(mp->m_dentry, m->mnt.mnt_root))
214 return 0; 219 return 0;
215 if (m->mnt_group_id == last_dest->mnt_group_id) { 220 if (peers(m, last_dest)) {
216 type = CL_MAKE_SHARED; 221 type = CL_MAKE_SHARED;
217 } else { 222 } else {
218 struct mount *n, *p; 223 struct mount *n, *p;
@@ -223,7 +228,7 @@ static int propagate_one(struct mount *m)
223 last_source = last_source->mnt_master; 228 last_source = last_source->mnt_master;
224 last_dest = last_source->mnt_parent; 229 last_dest = last_source->mnt_parent;
225 } 230 }
226 if (n->mnt_group_id != last_dest->mnt_group_id) { 231 if (!peers(n, last_dest)) {
227 last_source = last_source->mnt_master; 232 last_source = last_source->mnt_master;
228 last_dest = last_source->mnt_parent; 233 last_dest = last_source->mnt_parent;
229 } 234 }
diff --git a/fs/read_write.c b/fs/read_write.c
index 324ec271cc4e..dadf24e5c95b 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -17,6 +17,7 @@
17#include <linux/splice.h> 17#include <linux/splice.h>
18#include <linux/compat.h> 18#include <linux/compat.h>
19#include <linux/mount.h> 19#include <linux/mount.h>
20#include <linux/fs.h>
20#include "internal.h" 21#include "internal.h"
21 22
22#include <asm/uaccess.h> 23#include <asm/uaccess.h>
@@ -183,7 +184,7 @@ loff_t no_seek_end_llseek(struct file *file, loff_t offset, int whence)
183 switch (whence) { 184 switch (whence) {
184 case SEEK_SET: case SEEK_CUR: 185 case SEEK_SET: case SEEK_CUR:
185 return generic_file_llseek_size(file, offset, whence, 186 return generic_file_llseek_size(file, offset, whence,
186 ~0ULL, 0); 187 OFFSET_MAX, 0);
187 default: 188 default:
188 return -EINVAL; 189 return -EINVAL;
189 } 190 }
@@ -1532,10 +1533,12 @@ int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
1532 1533
1533 if (!(file_in->f_mode & FMODE_READ) || 1534 if (!(file_in->f_mode & FMODE_READ) ||
1534 !(file_out->f_mode & FMODE_WRITE) || 1535 !(file_out->f_mode & FMODE_WRITE) ||
1535 (file_out->f_flags & O_APPEND) || 1536 (file_out->f_flags & O_APPEND))
1536 !file_in->f_op->clone_file_range)
1537 return -EBADF; 1537 return -EBADF;
1538 1538
1539 if (!file_in->f_op->clone_file_range)
1540 return -EOPNOTSUPP;
1541
1539 ret = clone_verify_area(file_in, pos_in, len, false); 1542 ret = clone_verify_area(file_in, pos_in, len, false);
1540 if (ret) 1543 if (ret)
1541 return ret; 1544 return ret;
diff --git a/fs/super.c b/fs/super.c
index 1182af8fd5ff..74914b1bae70 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -415,6 +415,7 @@ void generic_shutdown_super(struct super_block *sb)
415 sb->s_flags &= ~MS_ACTIVE; 415 sb->s_flags &= ~MS_ACTIVE;
416 416
417 fsnotify_unmount_inodes(sb); 417 fsnotify_unmount_inodes(sb);
418 cgroup_writeback_umount();
418 419
419 evict_inodes(sb); 420 evict_inodes(sb);
420 421
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 50311703135b..66cdb44616d5 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -287,6 +287,12 @@ int handle_userfault(struct vm_area_struct *vma, unsigned long address,
287 goto out; 287 goto out;
288 288
289 /* 289 /*
290 * We don't do userfault handling for the final child pid update.
291 */
292 if (current->flags & PF_EXITING)
293 goto out;
294
295 /*
290 * Check that we can return VM_FAULT_RETRY. 296 * Check that we can return VM_FAULT_RETRY.
291 * 297 *
292 * NOTE: it should become possible to return VM_FAULT_RETRY 298 * NOTE: it should become possible to return VM_FAULT_RETRY
diff --git a/fs/xattr.c b/fs/xattr.c
index 07d0e47f6a7f..4861322e28e8 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -940,7 +940,7 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
940 bool trusted = capable(CAP_SYS_ADMIN); 940 bool trusted = capable(CAP_SYS_ADMIN);
941 struct simple_xattr *xattr; 941 struct simple_xattr *xattr;
942 ssize_t remaining_size = size; 942 ssize_t remaining_size = size;
943 int err; 943 int err = 0;
944 944
945#ifdef CONFIG_FS_POSIX_ACL 945#ifdef CONFIG_FS_POSIX_ACL
946 if (inode->i_acl) { 946 if (inode->i_acl) {
@@ -965,11 +965,11 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
965 965
966 err = xattr_list_one(&buffer, &remaining_size, xattr->name); 966 err = xattr_list_one(&buffer, &remaining_size, xattr->name);
967 if (err) 967 if (err)
968 return err; 968 break;
969 } 969 }
970 spin_unlock(&xattrs->lock); 970 spin_unlock(&xattrs->lock);
971 971
972 return size - remaining_size; 972 return err ? err : size - remaining_size;
973} 973}
974 974
975/* 975/*
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 379c089fb051..a9ebabfe7587 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -55,7 +55,7 @@ xfs_count_page_state(
55 } while ((bh = bh->b_this_page) != head); 55 } while ((bh = bh->b_this_page) != head);
56} 56}
57 57
58STATIC struct block_device * 58struct block_device *
59xfs_find_bdev_for_inode( 59xfs_find_bdev_for_inode(
60 struct inode *inode) 60 struct inode *inode)
61{ 61{
@@ -1208,6 +1208,10 @@ xfs_vm_writepages(
1208 struct writeback_control *wbc) 1208 struct writeback_control *wbc)
1209{ 1209{
1210 xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); 1210 xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
1211 if (dax_mapping(mapping))
1212 return dax_writeback_mapping_range(mapping,
1213 xfs_find_bdev_for_inode(mapping->host), wbc);
1214
1211 return generic_writepages(mapping, wbc); 1215 return generic_writepages(mapping, wbc);
1212} 1216}
1213 1217
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index f6ffc9ae5ceb..a4343c63fb38 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -62,5 +62,6 @@ int xfs_get_blocks_dax_fault(struct inode *inode, sector_t offset,
62 struct buffer_head *map_bh, int create); 62 struct buffer_head *map_bh, int create);
63 63
64extern void xfs_count_page_state(struct page *, int *, int *); 64extern void xfs_count_page_state(struct page *, int *, int *);
65extern struct block_device *xfs_find_bdev_for_inode(struct inode *);
65 66
66#endif /* __XFS_AOPS_H__ */ 67#endif /* __XFS_AOPS_H__ */
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 45ec9e40150c..6c876012b2e5 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -75,7 +75,8 @@ xfs_zero_extent(
75 ssize_t size = XFS_FSB_TO_B(mp, count_fsb); 75 ssize_t size = XFS_FSB_TO_B(mp, count_fsb);
76 76
77 if (IS_DAX(VFS_I(ip))) 77 if (IS_DAX(VFS_I(ip)))
78 return dax_clear_blocks(VFS_I(ip), block, size); 78 return dax_clear_sectors(xfs_find_bdev_for_inode(VFS_I(ip)),
79 sector, size);
79 80
80 /* 81 /*
81 * let the block layer decide on the fastest method of 82 * let the block layer decide on the fastest method of