diff options
author | Ingo Molnar <mingo@kernel.org> | 2016-03-07 03:27:30 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2016-03-07 03:27:30 -0500 |
commit | ec87e1cf7d8399d81d8965c6d852f8057a8dd687 (patch) | |
tree | 472a168fa4861090edf110c8a9712a5c15ea259f /fs | |
parent | 869ae76147ffdf21ad24f0e599303cd58a2bb39f (diff) | |
parent | f6cede5b49e822ebc41a099fe41ab4989f64e2cb (diff) |
Merge tag 'v4.5-rc7' into x86/asm, to pick up SMAP fix
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'fs')
59 files changed, 686 insertions, 314 deletions
diff --git a/fs/affs/file.c b/fs/affs/file.c index 0548c53f41d5..22fc7c802d69 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c | |||
@@ -511,8 +511,6 @@ affs_do_readpage_ofs(struct page *page, unsigned to) | |||
511 | pr_debug("%s(%lu, %ld, 0, %d)\n", __func__, inode->i_ino, | 511 | pr_debug("%s(%lu, %ld, 0, %d)\n", __func__, inode->i_ino, |
512 | page->index, to); | 512 | page->index, to); |
513 | BUG_ON(to > PAGE_CACHE_SIZE); | 513 | BUG_ON(to > PAGE_CACHE_SIZE); |
514 | kmap(page); | ||
515 | data = page_address(page); | ||
516 | bsize = AFFS_SB(sb)->s_data_blksize; | 514 | bsize = AFFS_SB(sb)->s_data_blksize; |
517 | tmp = page->index << PAGE_CACHE_SHIFT; | 515 | tmp = page->index << PAGE_CACHE_SHIFT; |
518 | bidx = tmp / bsize; | 516 | bidx = tmp / bsize; |
@@ -524,14 +522,15 @@ affs_do_readpage_ofs(struct page *page, unsigned to) | |||
524 | return PTR_ERR(bh); | 522 | return PTR_ERR(bh); |
525 | tmp = min(bsize - boff, to - pos); | 523 | tmp = min(bsize - boff, to - pos); |
526 | BUG_ON(pos + tmp > to || tmp > bsize); | 524 | BUG_ON(pos + tmp > to || tmp > bsize); |
525 | data = kmap_atomic(page); | ||
527 | memcpy(data + pos, AFFS_DATA(bh) + boff, tmp); | 526 | memcpy(data + pos, AFFS_DATA(bh) + boff, tmp); |
527 | kunmap_atomic(data); | ||
528 | affs_brelse(bh); | 528 | affs_brelse(bh); |
529 | bidx++; | 529 | bidx++; |
530 | pos += tmp; | 530 | pos += tmp; |
531 | boff = 0; | 531 | boff = 0; |
532 | } | 532 | } |
533 | flush_dcache_page(page); | 533 | flush_dcache_page(page); |
534 | kunmap(page); | ||
535 | return 0; | 534 | return 0; |
536 | } | 535 | } |
537 | 536 | ||
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 051ea4809c14..7d914c67a9d0 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c | |||
@@ -653,7 +653,7 @@ static unsigned long randomize_stack_top(unsigned long stack_top) | |||
653 | 653 | ||
654 | if ((current->flags & PF_RANDOMIZE) && | 654 | if ((current->flags & PF_RANDOMIZE) && |
655 | !(current->personality & ADDR_NO_RANDOMIZE)) { | 655 | !(current->personality & ADDR_NO_RANDOMIZE)) { |
656 | random_variable = (unsigned long) get_random_int(); | 656 | random_variable = get_random_long(); |
657 | random_variable &= STACK_RND_MASK; | 657 | random_variable &= STACK_RND_MASK; |
658 | random_variable <<= PAGE_SHIFT; | 658 | random_variable <<= PAGE_SHIFT; |
659 | } | 659 | } |
diff --git a/fs/block_dev.c b/fs/block_dev.c index 39b3a174a425..826b164a4b5b 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -1201,7 +1201,11 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) | |||
1201 | bdev->bd_disk = disk; | 1201 | bdev->bd_disk = disk; |
1202 | bdev->bd_queue = disk->queue; | 1202 | bdev->bd_queue = disk->queue; |
1203 | bdev->bd_contains = bdev; | 1203 | bdev->bd_contains = bdev; |
1204 | bdev->bd_inode->i_flags = disk->fops->direct_access ? S_DAX : 0; | 1204 | if (IS_ENABLED(CONFIG_BLK_DEV_DAX) && disk->fops->direct_access) |
1205 | bdev->bd_inode->i_flags = S_DAX; | ||
1206 | else | ||
1207 | bdev->bd_inode->i_flags = 0; | ||
1208 | |||
1205 | if (!partno) { | 1209 | if (!partno) { |
1206 | ret = -ENXIO; | 1210 | ret = -ENXIO; |
1207 | bdev->bd_part = disk_get_part(disk, partno); | 1211 | bdev->bd_part = disk_get_part(disk, partno); |
@@ -1693,13 +1697,24 @@ static int blkdev_releasepage(struct page *page, gfp_t wait) | |||
1693 | return try_to_free_buffers(page); | 1697 | return try_to_free_buffers(page); |
1694 | } | 1698 | } |
1695 | 1699 | ||
1700 | static int blkdev_writepages(struct address_space *mapping, | ||
1701 | struct writeback_control *wbc) | ||
1702 | { | ||
1703 | if (dax_mapping(mapping)) { | ||
1704 | struct block_device *bdev = I_BDEV(mapping->host); | ||
1705 | |||
1706 | return dax_writeback_mapping_range(mapping, bdev, wbc); | ||
1707 | } | ||
1708 | return generic_writepages(mapping, wbc); | ||
1709 | } | ||
1710 | |||
1696 | static const struct address_space_operations def_blk_aops = { | 1711 | static const struct address_space_operations def_blk_aops = { |
1697 | .readpage = blkdev_readpage, | 1712 | .readpage = blkdev_readpage, |
1698 | .readpages = blkdev_readpages, | 1713 | .readpages = blkdev_readpages, |
1699 | .writepage = blkdev_writepage, | 1714 | .writepage = blkdev_writepage, |
1700 | .write_begin = blkdev_write_begin, | 1715 | .write_begin = blkdev_write_begin, |
1701 | .write_end = blkdev_write_end, | 1716 | .write_end = blkdev_write_end, |
1702 | .writepages = generic_writepages, | 1717 | .writepages = blkdev_writepages, |
1703 | .releasepage = blkdev_releasepage, | 1718 | .releasepage = blkdev_releasepage, |
1704 | .direct_IO = blkdev_direct_IO, | 1719 | .direct_IO = blkdev_direct_IO, |
1705 | .is_dirty_writeback = buffer_check_dirty_writeback, | 1720 | .is_dirty_writeback = buffer_check_dirty_writeback, |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 151b7c71b868..d96f5cf38a2d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -7986,6 +7986,7 @@ static void btrfs_endio_direct_read(struct bio *bio) | |||
7986 | 7986 | ||
7987 | kfree(dip); | 7987 | kfree(dip); |
7988 | 7988 | ||
7989 | dio_bio->bi_error = bio->bi_error; | ||
7989 | dio_end_io(dio_bio, bio->bi_error); | 7990 | dio_end_io(dio_bio, bio->bi_error); |
7990 | 7991 | ||
7991 | if (io_bio->end_io) | 7992 | if (io_bio->end_io) |
@@ -8040,6 +8041,7 @@ static void btrfs_endio_direct_write(struct bio *bio) | |||
8040 | 8041 | ||
8041 | kfree(dip); | 8042 | kfree(dip); |
8042 | 8043 | ||
8044 | dio_bio->bi_error = bio->bi_error; | ||
8043 | dio_end_io(dio_bio, bio->bi_error); | 8045 | dio_end_io(dio_bio, bio->bi_error); |
8044 | bio_put(bio); | 8046 | bio_put(bio); |
8045 | } | 8047 | } |
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 7cf8509deda7..2c849b08a91b 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c | |||
@@ -310,8 +310,16 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) | |||
310 | set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state); | 310 | set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state); |
311 | 311 | ||
312 | err = btrfs_insert_fs_root(root->fs_info, root); | 312 | err = btrfs_insert_fs_root(root->fs_info, root); |
313 | /* | ||
314 | * The root might have been inserted already, as before we look | ||
315 | * for orphan roots, log replay might have happened, which | ||
316 | * triggers a transaction commit and qgroup accounting, which | ||
317 | * in turn reads and inserts fs roots while doing backref | ||
318 | * walking. | ||
319 | */ | ||
320 | if (err == -EEXIST) | ||
321 | err = 0; | ||
313 | if (err) { | 322 | if (err) { |
314 | BUG_ON(err == -EEXIST); | ||
315 | btrfs_free_fs_root(root); | 323 | btrfs_free_fs_root(root); |
316 | break; | 324 | break; |
317 | } | 325 | } |
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index c22213789090..19adeb0ef82a 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -1756,6 +1756,10 @@ int ceph_pool_perm_check(struct ceph_inode_info *ci, int need) | |||
1756 | u32 pool; | 1756 | u32 pool; |
1757 | int ret, flags; | 1757 | int ret, flags; |
1758 | 1758 | ||
1759 | /* does not support pool namespace yet */ | ||
1760 | if (ci->i_pool_ns_len) | ||
1761 | return -EIO; | ||
1762 | |||
1759 | if (ceph_test_mount_opt(ceph_inode_to_client(&ci->vfs_inode), | 1763 | if (ceph_test_mount_opt(ceph_inode_to_client(&ci->vfs_inode), |
1760 | NOPOOLPERM)) | 1764 | NOPOOLPERM)) |
1761 | return 0; | 1765 | return 0; |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index cdbf8cf3d52c..6fe0ad26a7df 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -2753,7 +2753,8 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc, | |||
2753 | void *inline_data, int inline_len, | 2753 | void *inline_data, int inline_len, |
2754 | struct ceph_buffer *xattr_buf, | 2754 | struct ceph_buffer *xattr_buf, |
2755 | struct ceph_mds_session *session, | 2755 | struct ceph_mds_session *session, |
2756 | struct ceph_cap *cap, int issued) | 2756 | struct ceph_cap *cap, int issued, |
2757 | u32 pool_ns_len) | ||
2757 | __releases(ci->i_ceph_lock) | 2758 | __releases(ci->i_ceph_lock) |
2758 | __releases(mdsc->snap_rwsem) | 2759 | __releases(mdsc->snap_rwsem) |
2759 | { | 2760 | { |
@@ -2873,6 +2874,8 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc, | |||
2873 | if (newcaps & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR)) { | 2874 | if (newcaps & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR)) { |
2874 | /* file layout may have changed */ | 2875 | /* file layout may have changed */ |
2875 | ci->i_layout = grant->layout; | 2876 | ci->i_layout = grant->layout; |
2877 | ci->i_pool_ns_len = pool_ns_len; | ||
2878 | |||
2876 | /* size/truncate_seq? */ | 2879 | /* size/truncate_seq? */ |
2877 | queue_trunc = ceph_fill_file_size(inode, issued, | 2880 | queue_trunc = ceph_fill_file_size(inode, issued, |
2878 | le32_to_cpu(grant->truncate_seq), | 2881 | le32_to_cpu(grant->truncate_seq), |
@@ -3411,6 +3414,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
3411 | u32 inline_len = 0; | 3414 | u32 inline_len = 0; |
3412 | void *snaptrace; | 3415 | void *snaptrace; |
3413 | size_t snaptrace_len; | 3416 | size_t snaptrace_len; |
3417 | u32 pool_ns_len = 0; | ||
3414 | void *p, *end; | 3418 | void *p, *end; |
3415 | 3419 | ||
3416 | dout("handle_caps from mds%d\n", mds); | 3420 | dout("handle_caps from mds%d\n", mds); |
@@ -3463,6 +3467,21 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
3463 | p += inline_len; | 3467 | p += inline_len; |
3464 | } | 3468 | } |
3465 | 3469 | ||
3470 | if (le16_to_cpu(msg->hdr.version) >= 8) { | ||
3471 | u64 flush_tid; | ||
3472 | u32 caller_uid, caller_gid; | ||
3473 | u32 osd_epoch_barrier; | ||
3474 | /* version >= 5 */ | ||
3475 | ceph_decode_32_safe(&p, end, osd_epoch_barrier, bad); | ||
3476 | /* version >= 6 */ | ||
3477 | ceph_decode_64_safe(&p, end, flush_tid, bad); | ||
3478 | /* version >= 7 */ | ||
3479 | ceph_decode_32_safe(&p, end, caller_uid, bad); | ||
3480 | ceph_decode_32_safe(&p, end, caller_gid, bad); | ||
3481 | /* version >= 8 */ | ||
3482 | ceph_decode_32_safe(&p, end, pool_ns_len, bad); | ||
3483 | } | ||
3484 | |||
3466 | /* lookup ino */ | 3485 | /* lookup ino */ |
3467 | inode = ceph_find_inode(sb, vino); | 3486 | inode = ceph_find_inode(sb, vino); |
3468 | ci = ceph_inode(inode); | 3487 | ci = ceph_inode(inode); |
@@ -3518,7 +3537,8 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
3518 | &cap, &issued); | 3537 | &cap, &issued); |
3519 | handle_cap_grant(mdsc, inode, h, | 3538 | handle_cap_grant(mdsc, inode, h, |
3520 | inline_version, inline_data, inline_len, | 3539 | inline_version, inline_data, inline_len, |
3521 | msg->middle, session, cap, issued); | 3540 | msg->middle, session, cap, issued, |
3541 | pool_ns_len); | ||
3522 | if (realm) | 3542 | if (realm) |
3523 | ceph_put_snap_realm(mdsc, realm); | 3543 | ceph_put_snap_realm(mdsc, realm); |
3524 | goto done_unlocked; | 3544 | goto done_unlocked; |
@@ -3542,7 +3562,8 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
3542 | issued |= __ceph_caps_dirty(ci); | 3562 | issued |= __ceph_caps_dirty(ci); |
3543 | handle_cap_grant(mdsc, inode, h, | 3563 | handle_cap_grant(mdsc, inode, h, |
3544 | inline_version, inline_data, inline_len, | 3564 | inline_version, inline_data, inline_len, |
3545 | msg->middle, session, cap, issued); | 3565 | msg->middle, session, cap, issued, |
3566 | pool_ns_len); | ||
3546 | goto done_unlocked; | 3567 | goto done_unlocked; |
3547 | 3568 | ||
3548 | case CEPH_CAP_OP_FLUSH_ACK: | 3569 | case CEPH_CAP_OP_FLUSH_ACK: |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index fb4ba2e4e2a5..5849b88bbed3 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -396,6 +396,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb) | |||
396 | ci->i_symlink = NULL; | 396 | ci->i_symlink = NULL; |
397 | 397 | ||
398 | memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout)); | 398 | memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout)); |
399 | ci->i_pool_ns_len = 0; | ||
399 | 400 | ||
400 | ci->i_fragtree = RB_ROOT; | 401 | ci->i_fragtree = RB_ROOT; |
401 | mutex_init(&ci->i_fragtree_mutex); | 402 | mutex_init(&ci->i_fragtree_mutex); |
@@ -756,6 +757,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page, | |||
756 | if (ci->i_layout.fl_pg_pool != info->layout.fl_pg_pool) | 757 | if (ci->i_layout.fl_pg_pool != info->layout.fl_pg_pool) |
757 | ci->i_ceph_flags &= ~CEPH_I_POOL_PERM; | 758 | ci->i_ceph_flags &= ~CEPH_I_POOL_PERM; |
758 | ci->i_layout = info->layout; | 759 | ci->i_layout = info->layout; |
760 | ci->i_pool_ns_len = iinfo->pool_ns_len; | ||
759 | 761 | ||
760 | queue_trunc = ceph_fill_file_size(inode, issued, | 762 | queue_trunc = ceph_fill_file_size(inode, issued, |
761 | le32_to_cpu(info->truncate_seq), | 763 | le32_to_cpu(info->truncate_seq), |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index e7b130a637f9..911d64d865f1 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -100,6 +100,14 @@ static int parse_reply_info_in(void **p, void *end, | |||
100 | } else | 100 | } else |
101 | info->inline_version = CEPH_INLINE_NONE; | 101 | info->inline_version = CEPH_INLINE_NONE; |
102 | 102 | ||
103 | if (features & CEPH_FEATURE_FS_FILE_LAYOUT_V2) { | ||
104 | ceph_decode_32_safe(p, end, info->pool_ns_len, bad); | ||
105 | ceph_decode_need(p, end, info->pool_ns_len, bad); | ||
106 | *p += info->pool_ns_len; | ||
107 | } else { | ||
108 | info->pool_ns_len = 0; | ||
109 | } | ||
110 | |||
103 | return 0; | 111 | return 0; |
104 | bad: | 112 | bad: |
105 | return err; | 113 | return err; |
@@ -2298,6 +2306,14 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, | |||
2298 | ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir), | 2306 | ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir), |
2299 | CEPH_CAP_PIN); | 2307 | CEPH_CAP_PIN); |
2300 | 2308 | ||
2309 | /* deny access to directories with pool_ns layouts */ | ||
2310 | if (req->r_inode && S_ISDIR(req->r_inode->i_mode) && | ||
2311 | ceph_inode(req->r_inode)->i_pool_ns_len) | ||
2312 | return -EIO; | ||
2313 | if (req->r_locked_dir && | ||
2314 | ceph_inode(req->r_locked_dir)->i_pool_ns_len) | ||
2315 | return -EIO; | ||
2316 | |||
2301 | /* issue */ | 2317 | /* issue */ |
2302 | mutex_lock(&mdsc->mutex); | 2318 | mutex_lock(&mdsc->mutex); |
2303 | __register_request(mdsc, req, dir); | 2319 | __register_request(mdsc, req, dir); |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index ccf11ef0ca87..37712ccffcc6 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
@@ -44,6 +44,7 @@ struct ceph_mds_reply_info_in { | |||
44 | u64 inline_version; | 44 | u64 inline_version; |
45 | u32 inline_len; | 45 | u32 inline_len; |
46 | char *inline_data; | 46 | char *inline_data; |
47 | u32 pool_ns_len; | ||
47 | }; | 48 | }; |
48 | 49 | ||
49 | /* | 50 | /* |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 75b7d125ce66..9c458eb52245 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -287,6 +287,7 @@ struct ceph_inode_info { | |||
287 | 287 | ||
288 | struct ceph_dir_layout i_dir_layout; | 288 | struct ceph_dir_layout i_dir_layout; |
289 | struct ceph_file_layout i_layout; | 289 | struct ceph_file_layout i_layout; |
290 | size_t i_pool_ns_len; | ||
290 | char *i_symlink; | 291 | char *i_symlink; |
291 | 292 | ||
292 | /* for dirs */ | 293 | /* for dirs */ |
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index 7dc886c9a78f..e956cba94338 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c | |||
@@ -175,7 +175,7 @@ char *cifs_compose_mount_options(const char *sb_mountdata, | |||
175 | * string to the length of the original string to allow for worst case. | 175 | * string to the length of the original string to allow for worst case. |
176 | */ | 176 | */ |
177 | md_len = strlen(sb_mountdata) + INET6_ADDRSTRLEN; | 177 | md_len = strlen(sb_mountdata) + INET6_ADDRSTRLEN; |
178 | mountdata = kzalloc(md_len + 1, GFP_KERNEL); | 178 | mountdata = kzalloc(md_len + sizeof("ip=") + 1, GFP_KERNEL); |
179 | if (mountdata == NULL) { | 179 | if (mountdata == NULL) { |
180 | rc = -ENOMEM; | 180 | rc = -ENOMEM; |
181 | goto compose_mount_options_err; | 181 | goto compose_mount_options_err; |
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index afa09fce8151..e682b36a210f 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c | |||
@@ -714,7 +714,7 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) | |||
714 | 714 | ||
715 | ses->auth_key.response = kmalloc(baselen + tilen, GFP_KERNEL); | 715 | ses->auth_key.response = kmalloc(baselen + tilen, GFP_KERNEL); |
716 | if (!ses->auth_key.response) { | 716 | if (!ses->auth_key.response) { |
717 | rc = ENOMEM; | 717 | rc = -ENOMEM; |
718 | ses->auth_key.len = 0; | 718 | ses->auth_key.len = 0; |
719 | goto setup_ntlmv2_rsp_ret; | 719 | goto setup_ntlmv2_rsp_ret; |
720 | } | 720 | } |
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index c48ca13673e3..2eea40353e60 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c | |||
@@ -1013,7 +1013,6 @@ const struct file_operations cifs_file_strict_ops = { | |||
1013 | .llseek = cifs_llseek, | 1013 | .llseek = cifs_llseek, |
1014 | .unlocked_ioctl = cifs_ioctl, | 1014 | .unlocked_ioctl = cifs_ioctl, |
1015 | .clone_file_range = cifs_clone_file_range, | 1015 | .clone_file_range = cifs_clone_file_range, |
1016 | .clone_file_range = cifs_clone_file_range, | ||
1017 | .setlease = cifs_setlease, | 1016 | .setlease = cifs_setlease, |
1018 | .fallocate = cifs_fallocate, | 1017 | .fallocate = cifs_fallocate, |
1019 | }; | 1018 | }; |
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 68c4547528c4..83aac8ba50b0 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h | |||
@@ -31,19 +31,15 @@ | |||
31 | * so that it will fit. We use hash_64 to convert the value to 31 bits, and | 31 | * so that it will fit. We use hash_64 to convert the value to 31 bits, and |
32 | * then add 1, to ensure that we don't end up with a 0 as the value. | 32 | * then add 1, to ensure that we don't end up with a 0 as the value. |
33 | */ | 33 | */ |
34 | #if BITS_PER_LONG == 64 | ||
35 | static inline ino_t | 34 | static inline ino_t |
36 | cifs_uniqueid_to_ino_t(u64 fileid) | 35 | cifs_uniqueid_to_ino_t(u64 fileid) |
37 | { | 36 | { |
37 | if ((sizeof(ino_t)) < (sizeof(u64))) | ||
38 | return (ino_t)hash_64(fileid, (sizeof(ino_t) * 8) - 1) + 1; | ||
39 | |||
38 | return (ino_t)fileid; | 40 | return (ino_t)fileid; |
41 | |||
39 | } | 42 | } |
40 | #else | ||
41 | static inline ino_t | ||
42 | cifs_uniqueid_to_ino_t(u64 fileid) | ||
43 | { | ||
44 | return (ino_t)hash_64(fileid, (sizeof(ino_t) * 8) - 1) + 1; | ||
45 | } | ||
46 | #endif | ||
47 | 43 | ||
48 | extern struct file_system_type cifs_fs_type; | 44 | extern struct file_system_type cifs_fs_type; |
49 | extern const struct address_space_operations cifs_addr_ops; | 45 | extern const struct address_space_operations cifs_addr_ops; |
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 90b4f9f7de66..76fcb50295a3 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c | |||
@@ -1396,11 +1396,10 @@ openRetry: | |||
1396 | * current bigbuf. | 1396 | * current bigbuf. |
1397 | */ | 1397 | */ |
1398 | static int | 1398 | static int |
1399 | cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid) | 1399 | discard_remaining_data(struct TCP_Server_Info *server) |
1400 | { | 1400 | { |
1401 | unsigned int rfclen = get_rfc1002_length(server->smallbuf); | 1401 | unsigned int rfclen = get_rfc1002_length(server->smallbuf); |
1402 | int remaining = rfclen + 4 - server->total_read; | 1402 | int remaining = rfclen + 4 - server->total_read; |
1403 | struct cifs_readdata *rdata = mid->callback_data; | ||
1404 | 1403 | ||
1405 | while (remaining > 0) { | 1404 | while (remaining > 0) { |
1406 | int length; | 1405 | int length; |
@@ -1414,10 +1413,20 @@ cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid) | |||
1414 | remaining -= length; | 1413 | remaining -= length; |
1415 | } | 1414 | } |
1416 | 1415 | ||
1417 | dequeue_mid(mid, rdata->result); | ||
1418 | return 0; | 1416 | return 0; |
1419 | } | 1417 | } |
1420 | 1418 | ||
1419 | static int | ||
1420 | cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid) | ||
1421 | { | ||
1422 | int length; | ||
1423 | struct cifs_readdata *rdata = mid->callback_data; | ||
1424 | |||
1425 | length = discard_remaining_data(server); | ||
1426 | dequeue_mid(mid, rdata->result); | ||
1427 | return length; | ||
1428 | } | ||
1429 | |||
1421 | int | 1430 | int |
1422 | cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) | 1431 | cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) |
1423 | { | 1432 | { |
@@ -1446,6 +1455,12 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) | |||
1446 | return length; | 1455 | return length; |
1447 | server->total_read += length; | 1456 | server->total_read += length; |
1448 | 1457 | ||
1458 | if (server->ops->is_status_pending && | ||
1459 | server->ops->is_status_pending(buf, server, 0)) { | ||
1460 | discard_remaining_data(server); | ||
1461 | return -1; | ||
1462 | } | ||
1463 | |||
1449 | /* Was the SMB read successful? */ | 1464 | /* Was the SMB read successful? */ |
1450 | rdata->result = server->ops->map_error(buf, false); | 1465 | rdata->result = server->ops->map_error(buf, false); |
1451 | if (rdata->result != 0) { | 1466 | if (rdata->result != 0) { |
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 4fbd92d2e113..a763cd3d9e7c 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c | |||
@@ -2999,8 +2999,7 @@ ip_rfc1001_connect(struct TCP_Server_Info *server) | |||
2999 | if (ses_init_buf) { | 2999 | if (ses_init_buf) { |
3000 | ses_init_buf->trailer.session_req.called_len = 32; | 3000 | ses_init_buf->trailer.session_req.called_len = 32; |
3001 | 3001 | ||
3002 | if (server->server_RFC1001_name && | 3002 | if (server->server_RFC1001_name[0] != 0) |
3003 | server->server_RFC1001_name[0] != 0) | ||
3004 | rfc1002mangle(ses_init_buf->trailer. | 3003 | rfc1002mangle(ses_init_buf->trailer. |
3005 | session_req.called_name, | 3004 | session_req.called_name, |
3006 | server->server_RFC1001_name, | 3005 | server->server_RFC1001_name, |
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 10f8d5cf5681..42e1f440eb1e 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c | |||
@@ -1106,21 +1106,25 @@ parse_lease_state(struct TCP_Server_Info *server, struct smb2_create_rsp *rsp, | |||
1106 | { | 1106 | { |
1107 | char *data_offset; | 1107 | char *data_offset; |
1108 | struct create_context *cc; | 1108 | struct create_context *cc; |
1109 | unsigned int next = 0; | 1109 | unsigned int next; |
1110 | unsigned int remaining; | ||
1110 | char *name; | 1111 | char *name; |
1111 | 1112 | ||
1112 | data_offset = (char *)rsp + 4 + le32_to_cpu(rsp->CreateContextsOffset); | 1113 | data_offset = (char *)rsp + 4 + le32_to_cpu(rsp->CreateContextsOffset); |
1114 | remaining = le32_to_cpu(rsp->CreateContextsLength); | ||
1113 | cc = (struct create_context *)data_offset; | 1115 | cc = (struct create_context *)data_offset; |
1114 | do { | 1116 | while (remaining >= sizeof(struct create_context)) { |
1115 | cc = (struct create_context *)((char *)cc + next); | ||
1116 | name = le16_to_cpu(cc->NameOffset) + (char *)cc; | 1117 | name = le16_to_cpu(cc->NameOffset) + (char *)cc; |
1117 | if (le16_to_cpu(cc->NameLength) != 4 || | 1118 | if (le16_to_cpu(cc->NameLength) == 4 && |
1118 | strncmp(name, "RqLs", 4)) { | 1119 | strncmp(name, "RqLs", 4) == 0) |
1119 | next = le32_to_cpu(cc->Next); | 1120 | return server->ops->parse_lease_buf(cc, epoch); |
1120 | continue; | 1121 | |
1121 | } | 1122 | next = le32_to_cpu(cc->Next); |
1122 | return server->ops->parse_lease_buf(cc, epoch); | 1123 | if (!next) |
1123 | } while (next != 0); | 1124 | break; |
1125 | remaining -= next; | ||
1126 | cc = (struct create_context *)((char *)cc + next); | ||
1127 | } | ||
1124 | 1128 | ||
1125 | return 0; | 1129 | return 0; |
1126 | } | 1130 | } |
@@ -79,15 +79,14 @@ struct page *read_dax_sector(struct block_device *bdev, sector_t n) | |||
79 | } | 79 | } |
80 | 80 | ||
81 | /* | 81 | /* |
82 | * dax_clear_blocks() is called from within transaction context from XFS, | 82 | * dax_clear_sectors() is called from within transaction context from XFS, |
83 | * and hence this means the stack from this point must follow GFP_NOFS | 83 | * and hence this means the stack from this point must follow GFP_NOFS |
84 | * semantics for all operations. | 84 | * semantics for all operations. |
85 | */ | 85 | */ |
86 | int dax_clear_blocks(struct inode *inode, sector_t block, long _size) | 86 | int dax_clear_sectors(struct block_device *bdev, sector_t _sector, long _size) |
87 | { | 87 | { |
88 | struct block_device *bdev = inode->i_sb->s_bdev; | ||
89 | struct blk_dax_ctl dax = { | 88 | struct blk_dax_ctl dax = { |
90 | .sector = block << (inode->i_blkbits - 9), | 89 | .sector = _sector, |
91 | .size = _size, | 90 | .size = _size, |
92 | }; | 91 | }; |
93 | 92 | ||
@@ -109,7 +108,7 @@ int dax_clear_blocks(struct inode *inode, sector_t block, long _size) | |||
109 | wmb_pmem(); | 108 | wmb_pmem(); |
110 | return 0; | 109 | return 0; |
111 | } | 110 | } |
112 | EXPORT_SYMBOL_GPL(dax_clear_blocks); | 111 | EXPORT_SYMBOL_GPL(dax_clear_sectors); |
113 | 112 | ||
114 | /* the clear_pmem() calls are ordered by a wmb_pmem() in the caller */ | 113 | /* the clear_pmem() calls are ordered by a wmb_pmem() in the caller */ |
115 | static void dax_new_buf(void __pmem *addr, unsigned size, unsigned first, | 114 | static void dax_new_buf(void __pmem *addr, unsigned size, unsigned first, |
@@ -485,11 +484,10 @@ static int dax_writeback_one(struct block_device *bdev, | |||
485 | * end]. This is required by data integrity operations to ensure file data is | 484 | * end]. This is required by data integrity operations to ensure file data is |
486 | * on persistent storage prior to completion of the operation. | 485 | * on persistent storage prior to completion of the operation. |
487 | */ | 486 | */ |
488 | int dax_writeback_mapping_range(struct address_space *mapping, loff_t start, | 487 | int dax_writeback_mapping_range(struct address_space *mapping, |
489 | loff_t end) | 488 | struct block_device *bdev, struct writeback_control *wbc) |
490 | { | 489 | { |
491 | struct inode *inode = mapping->host; | 490 | struct inode *inode = mapping->host; |
492 | struct block_device *bdev = inode->i_sb->s_bdev; | ||
493 | pgoff_t start_index, end_index, pmd_index; | 491 | pgoff_t start_index, end_index, pmd_index; |
494 | pgoff_t indices[PAGEVEC_SIZE]; | 492 | pgoff_t indices[PAGEVEC_SIZE]; |
495 | struct pagevec pvec; | 493 | struct pagevec pvec; |
@@ -500,8 +498,11 @@ int dax_writeback_mapping_range(struct address_space *mapping, loff_t start, | |||
500 | if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT)) | 498 | if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT)) |
501 | return -EIO; | 499 | return -EIO; |
502 | 500 | ||
503 | start_index = start >> PAGE_CACHE_SHIFT; | 501 | if (!mapping->nrexceptional || wbc->sync_mode != WB_SYNC_ALL) |
504 | end_index = end >> PAGE_CACHE_SHIFT; | 502 | return 0; |
503 | |||
504 | start_index = wbc->range_start >> PAGE_CACHE_SHIFT; | ||
505 | end_index = wbc->range_end >> PAGE_CACHE_SHIFT; | ||
505 | pmd_index = DAX_PMD_INDEX(start_index); | 506 | pmd_index = DAX_PMD_INDEX(start_index); |
506 | 507 | ||
507 | rcu_read_lock(); | 508 | rcu_read_lock(); |
diff --git a/fs/dcache.c b/fs/dcache.c index 92d5140de851..2398f9f94337 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -269,9 +269,6 @@ static inline int dname_external(const struct dentry *dentry) | |||
269 | return dentry->d_name.name != dentry->d_iname; | 269 | return dentry->d_name.name != dentry->d_iname; |
270 | } | 270 | } |
271 | 271 | ||
272 | /* | ||
273 | * Make sure other CPUs see the inode attached before the type is set. | ||
274 | */ | ||
275 | static inline void __d_set_inode_and_type(struct dentry *dentry, | 272 | static inline void __d_set_inode_and_type(struct dentry *dentry, |
276 | struct inode *inode, | 273 | struct inode *inode, |
277 | unsigned type_flags) | 274 | unsigned type_flags) |
@@ -279,28 +276,18 @@ static inline void __d_set_inode_and_type(struct dentry *dentry, | |||
279 | unsigned flags; | 276 | unsigned flags; |
280 | 277 | ||
281 | dentry->d_inode = inode; | 278 | dentry->d_inode = inode; |
282 | smp_wmb(); | ||
283 | flags = READ_ONCE(dentry->d_flags); | 279 | flags = READ_ONCE(dentry->d_flags); |
284 | flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU); | 280 | flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU); |
285 | flags |= type_flags; | 281 | flags |= type_flags; |
286 | WRITE_ONCE(dentry->d_flags, flags); | 282 | WRITE_ONCE(dentry->d_flags, flags); |
287 | } | 283 | } |
288 | 284 | ||
289 | /* | ||
290 | * Ideally, we want to make sure that other CPUs see the flags cleared before | ||
291 | * the inode is detached, but this is really a violation of RCU principles | ||
292 | * since the ordering suggests we should always set inode before flags. | ||
293 | * | ||
294 | * We should instead replace or discard the entire dentry - but that sucks | ||
295 | * performancewise on mass deletion/rename. | ||
296 | */ | ||
297 | static inline void __d_clear_type_and_inode(struct dentry *dentry) | 285 | static inline void __d_clear_type_and_inode(struct dentry *dentry) |
298 | { | 286 | { |
299 | unsigned flags = READ_ONCE(dentry->d_flags); | 287 | unsigned flags = READ_ONCE(dentry->d_flags); |
300 | 288 | ||
301 | flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU); | 289 | flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU); |
302 | WRITE_ONCE(dentry->d_flags, flags); | 290 | WRITE_ONCE(dentry->d_flags, flags); |
303 | smp_wmb(); | ||
304 | dentry->d_inode = NULL; | 291 | dentry->d_inode = NULL; |
305 | } | 292 | } |
306 | 293 | ||
@@ -370,9 +357,11 @@ static void dentry_unlink_inode(struct dentry * dentry) | |||
370 | __releases(dentry->d_inode->i_lock) | 357 | __releases(dentry->d_inode->i_lock) |
371 | { | 358 | { |
372 | struct inode *inode = dentry->d_inode; | 359 | struct inode *inode = dentry->d_inode; |
360 | |||
361 | raw_write_seqcount_begin(&dentry->d_seq); | ||
373 | __d_clear_type_and_inode(dentry); | 362 | __d_clear_type_and_inode(dentry); |
374 | hlist_del_init(&dentry->d_u.d_alias); | 363 | hlist_del_init(&dentry->d_u.d_alias); |
375 | dentry_rcuwalk_invalidate(dentry); | 364 | raw_write_seqcount_end(&dentry->d_seq); |
376 | spin_unlock(&dentry->d_lock); | 365 | spin_unlock(&dentry->d_lock); |
377 | spin_unlock(&inode->i_lock); | 366 | spin_unlock(&inode->i_lock); |
378 | if (!inode->i_nlink) | 367 | if (!inode->i_nlink) |
@@ -1758,8 +1747,9 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) | |||
1758 | spin_lock(&dentry->d_lock); | 1747 | spin_lock(&dentry->d_lock); |
1759 | if (inode) | 1748 | if (inode) |
1760 | hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); | 1749 | hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); |
1750 | raw_write_seqcount_begin(&dentry->d_seq); | ||
1761 | __d_set_inode_and_type(dentry, inode, add_flags); | 1751 | __d_set_inode_and_type(dentry, inode, add_flags); |
1762 | dentry_rcuwalk_invalidate(dentry); | 1752 | raw_write_seqcount_end(&dentry->d_seq); |
1763 | spin_unlock(&dentry->d_lock); | 1753 | spin_unlock(&dentry->d_lock); |
1764 | fsnotify_d_instantiate(dentry, inode); | 1754 | fsnotify_d_instantiate(dentry, inode); |
1765 | } | 1755 | } |
diff --git a/fs/direct-io.c b/fs/direct-io.c index 1b2f7ffc8b84..d6a9012d42ad 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -472,8 +472,8 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio) | |||
472 | dio->io_error = -EIO; | 472 | dio->io_error = -EIO; |
473 | 473 | ||
474 | if (dio->is_async && dio->rw == READ && dio->should_dirty) { | 474 | if (dio->is_async && dio->rw == READ && dio->should_dirty) { |
475 | bio_check_pages_dirty(bio); /* transfers ownership */ | ||
476 | err = bio->bi_error; | 475 | err = bio->bi_error; |
476 | bio_check_pages_dirty(bio); /* transfers ownership */ | ||
477 | } else { | 477 | } else { |
478 | bio_for_each_segment_all(bvec, bio, i) { | 478 | bio_for_each_segment_all(bvec, bio, i) { |
479 | struct page *page = bvec->bv_page; | 479 | struct page *page = bvec->bv_page; |
diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 2c88d683cd91..c1400b109805 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c | |||
@@ -80,23 +80,6 @@ static int ext2_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, | |||
80 | return ret; | 80 | return ret; |
81 | } | 81 | } |
82 | 82 | ||
83 | static int ext2_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | ||
84 | { | ||
85 | struct inode *inode = file_inode(vma->vm_file); | ||
86 | struct ext2_inode_info *ei = EXT2_I(inode); | ||
87 | int ret; | ||
88 | |||
89 | sb_start_pagefault(inode->i_sb); | ||
90 | file_update_time(vma->vm_file); | ||
91 | down_read(&ei->dax_sem); | ||
92 | |||
93 | ret = __dax_mkwrite(vma, vmf, ext2_get_block, NULL); | ||
94 | |||
95 | up_read(&ei->dax_sem); | ||
96 | sb_end_pagefault(inode->i_sb); | ||
97 | return ret; | ||
98 | } | ||
99 | |||
100 | static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma, | 83 | static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma, |
101 | struct vm_fault *vmf) | 84 | struct vm_fault *vmf) |
102 | { | 85 | { |
@@ -124,7 +107,7 @@ static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma, | |||
124 | static const struct vm_operations_struct ext2_dax_vm_ops = { | 107 | static const struct vm_operations_struct ext2_dax_vm_ops = { |
125 | .fault = ext2_dax_fault, | 108 | .fault = ext2_dax_fault, |
126 | .pmd_fault = ext2_dax_pmd_fault, | 109 | .pmd_fault = ext2_dax_pmd_fault, |
127 | .page_mkwrite = ext2_dax_mkwrite, | 110 | .page_mkwrite = ext2_dax_fault, |
128 | .pfn_mkwrite = ext2_dax_pfn_mkwrite, | 111 | .pfn_mkwrite = ext2_dax_pfn_mkwrite, |
129 | }; | 112 | }; |
130 | 113 | ||
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 338eefda70c6..6bd58e6ff038 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c | |||
@@ -737,8 +737,10 @@ static int ext2_get_blocks(struct inode *inode, | |||
737 | * so that it's not found by another thread before it's | 737 | * so that it's not found by another thread before it's |
738 | * initialised | 738 | * initialised |
739 | */ | 739 | */ |
740 | err = dax_clear_blocks(inode, le32_to_cpu(chain[depth-1].key), | 740 | err = dax_clear_sectors(inode->i_sb->s_bdev, |
741 | 1 << inode->i_blkbits); | 741 | le32_to_cpu(chain[depth-1].key) << |
742 | (inode->i_blkbits - 9), | ||
743 | 1 << inode->i_blkbits); | ||
742 | if (err) { | 744 | if (err) { |
743 | mutex_unlock(&ei->truncate_mutex); | 745 | mutex_unlock(&ei->truncate_mutex); |
744 | goto cleanup; | 746 | goto cleanup; |
@@ -874,6 +876,14 @@ ext2_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) | |||
874 | static int | 876 | static int |
875 | ext2_writepages(struct address_space *mapping, struct writeback_control *wbc) | 877 | ext2_writepages(struct address_space *mapping, struct writeback_control *wbc) |
876 | { | 878 | { |
879 | #ifdef CONFIG_FS_DAX | ||
880 | if (dax_mapping(mapping)) { | ||
881 | return dax_writeback_mapping_range(mapping, | ||
882 | mapping->host->i_sb->s_bdev, | ||
883 | wbc); | ||
884 | } | ||
885 | #endif | ||
886 | |||
877 | return mpage_writepages(mapping, wbc, ext2_get_block); | 887 | return mpage_writepages(mapping, wbc, ext2_get_block); |
878 | } | 888 | } |
879 | 889 | ||
@@ -1296,7 +1306,7 @@ void ext2_set_inode_flags(struct inode *inode) | |||
1296 | inode->i_flags |= S_NOATIME; | 1306 | inode->i_flags |= S_NOATIME; |
1297 | if (flags & EXT2_DIRSYNC_FL) | 1307 | if (flags & EXT2_DIRSYNC_FL) |
1298 | inode->i_flags |= S_DIRSYNC; | 1308 | inode->i_flags |= S_DIRSYNC; |
1299 | if (test_opt(inode->i_sb, DAX)) | 1309 | if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode)) |
1300 | inode->i_flags |= S_DAX; | 1310 | inode->i_flags |= S_DAX; |
1301 | } | 1311 | } |
1302 | 1312 | ||
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index ec0668a60678..fe1f50fe764f 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -191,7 +191,6 @@ static int ext4_init_block_bitmap(struct super_block *sb, | |||
191 | /* If checksum is bad mark all blocks used to prevent allocation | 191 | /* If checksum is bad mark all blocks used to prevent allocation |
192 | * essentially implementing a per-group read-only flag. */ | 192 | * essentially implementing a per-group read-only flag. */ |
193 | if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { | 193 | if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { |
194 | ext4_error(sb, "Checksum bad for group %u", block_group); | ||
195 | grp = ext4_get_group_info(sb, block_group); | 194 | grp = ext4_get_group_info(sb, block_group); |
196 | if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) | 195 | if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) |
197 | percpu_counter_sub(&sbi->s_freeclusters_counter, | 196 | percpu_counter_sub(&sbi->s_freeclusters_counter, |
@@ -442,14 +441,16 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) | |||
442 | } | 441 | } |
443 | ext4_lock_group(sb, block_group); | 442 | ext4_lock_group(sb, block_group); |
444 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | 443 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { |
445 | |||
446 | err = ext4_init_block_bitmap(sb, bh, block_group, desc); | 444 | err = ext4_init_block_bitmap(sb, bh, block_group, desc); |
447 | set_bitmap_uptodate(bh); | 445 | set_bitmap_uptodate(bh); |
448 | set_buffer_uptodate(bh); | 446 | set_buffer_uptodate(bh); |
449 | ext4_unlock_group(sb, block_group); | 447 | ext4_unlock_group(sb, block_group); |
450 | unlock_buffer(bh); | 448 | unlock_buffer(bh); |
451 | if (err) | 449 | if (err) { |
450 | ext4_error(sb, "Failed to init block bitmap for group " | ||
451 | "%u: %d", block_group, err); | ||
452 | goto out; | 452 | goto out; |
453 | } | ||
453 | goto verify; | 454 | goto verify; |
454 | } | 455 | } |
455 | ext4_unlock_group(sb, block_group); | 456 | ext4_unlock_group(sb, block_group); |
diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c index c8021208a7eb..38f7562489bb 100644 --- a/fs/ext4/crypto.c +++ b/fs/ext4/crypto.c | |||
@@ -467,3 +467,59 @@ uint32_t ext4_validate_encryption_key_size(uint32_t mode, uint32_t size) | |||
467 | return size; | 467 | return size; |
468 | return 0; | 468 | return 0; |
469 | } | 469 | } |
470 | |||
471 | /* | ||
472 | * Validate dentries for encrypted directories to make sure we aren't | ||
473 | * potentially caching stale data after a key has been added or | ||
474 | * removed. | ||
475 | */ | ||
476 | static int ext4_d_revalidate(struct dentry *dentry, unsigned int flags) | ||
477 | { | ||
478 | struct inode *dir = d_inode(dentry->d_parent); | ||
479 | struct ext4_crypt_info *ci = EXT4_I(dir)->i_crypt_info; | ||
480 | int dir_has_key, cached_with_key; | ||
481 | |||
482 | if (!ext4_encrypted_inode(dir)) | ||
483 | return 0; | ||
484 | |||
485 | if (ci && ci->ci_keyring_key && | ||
486 | (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) | | ||
487 | (1 << KEY_FLAG_REVOKED) | | ||
488 | (1 << KEY_FLAG_DEAD)))) | ||
489 | ci = NULL; | ||
490 | |||
491 | /* this should eventually be an flag in d_flags */ | ||
492 | cached_with_key = dentry->d_fsdata != NULL; | ||
493 | dir_has_key = (ci != NULL); | ||
494 | |||
495 | /* | ||
496 | * If the dentry was cached without the key, and it is a | ||
497 | * negative dentry, it might be a valid name. We can't check | ||
498 | * if the key has since been made available due to locking | ||
499 | * reasons, so we fail the validation so ext4_lookup() can do | ||
500 | * this check. | ||
501 | * | ||
502 | * We also fail the validation if the dentry was created with | ||
503 | * the key present, but we no longer have the key, or vice versa. | ||
504 | */ | ||
505 | if ((!cached_with_key && d_is_negative(dentry)) || | ||
506 | (!cached_with_key && dir_has_key) || | ||
507 | (cached_with_key && !dir_has_key)) { | ||
508 | #if 0 /* Revalidation debug */ | ||
509 | char buf[80]; | ||
510 | char *cp = simple_dname(dentry, buf, sizeof(buf)); | ||
511 | |||
512 | if (IS_ERR(cp)) | ||
513 | cp = (char *) "???"; | ||
514 | pr_err("revalidate: %s %p %d %d %d\n", cp, dentry->d_fsdata, | ||
515 | cached_with_key, d_is_negative(dentry), | ||
516 | dir_has_key); | ||
517 | #endif | ||
518 | return 0; | ||
519 | } | ||
520 | return 1; | ||
521 | } | ||
522 | |||
523 | const struct dentry_operations ext4_encrypted_d_ops = { | ||
524 | .d_revalidate = ext4_d_revalidate, | ||
525 | }; | ||
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 1d1bca74f844..33f5e2a50cf8 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c | |||
@@ -111,6 +111,12 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) | |||
111 | int dir_has_error = 0; | 111 | int dir_has_error = 0; |
112 | struct ext4_str fname_crypto_str = {.name = NULL, .len = 0}; | 112 | struct ext4_str fname_crypto_str = {.name = NULL, .len = 0}; |
113 | 113 | ||
114 | if (ext4_encrypted_inode(inode)) { | ||
115 | err = ext4_get_encryption_info(inode); | ||
116 | if (err && err != -ENOKEY) | ||
117 | return err; | ||
118 | } | ||
119 | |||
114 | if (is_dx_dir(inode)) { | 120 | if (is_dx_dir(inode)) { |
115 | err = ext4_dx_readdir(file, ctx); | 121 | err = ext4_dx_readdir(file, ctx); |
116 | if (err != ERR_BAD_DX_DIR) { | 122 | if (err != ERR_BAD_DX_DIR) { |
@@ -157,8 +163,11 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) | |||
157 | index, 1); | 163 | index, 1); |
158 | file->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; | 164 | file->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; |
159 | bh = ext4_bread(NULL, inode, map.m_lblk, 0); | 165 | bh = ext4_bread(NULL, inode, map.m_lblk, 0); |
160 | if (IS_ERR(bh)) | 166 | if (IS_ERR(bh)) { |
161 | return PTR_ERR(bh); | 167 | err = PTR_ERR(bh); |
168 | bh = NULL; | ||
169 | goto errout; | ||
170 | } | ||
162 | } | 171 | } |
163 | 172 | ||
164 | if (!bh) { | 173 | if (!bh) { |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 0662b285dc8a..157b458a69d4 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -2302,6 +2302,7 @@ struct page *ext4_encrypt(struct inode *inode, | |||
2302 | int ext4_decrypt(struct page *page); | 2302 | int ext4_decrypt(struct page *page); |
2303 | int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk, | 2303 | int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk, |
2304 | ext4_fsblk_t pblk, ext4_lblk_t len); | 2304 | ext4_fsblk_t pblk, ext4_lblk_t len); |
2305 | extern const struct dentry_operations ext4_encrypted_d_ops; | ||
2305 | 2306 | ||
2306 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | 2307 | #ifdef CONFIG_EXT4_FS_ENCRYPTION |
2307 | int ext4_init_crypto(void); | 2308 | int ext4_init_crypto(void); |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 0ffabaf90aa5..3753ceb0b0dd 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -3928,7 +3928,7 @@ static int | |||
3928 | convert_initialized_extent(handle_t *handle, struct inode *inode, | 3928 | convert_initialized_extent(handle_t *handle, struct inode *inode, |
3929 | struct ext4_map_blocks *map, | 3929 | struct ext4_map_blocks *map, |
3930 | struct ext4_ext_path **ppath, int flags, | 3930 | struct ext4_ext_path **ppath, int flags, |
3931 | unsigned int allocated, ext4_fsblk_t newblock) | 3931 | unsigned int allocated) |
3932 | { | 3932 | { |
3933 | struct ext4_ext_path *path = *ppath; | 3933 | struct ext4_ext_path *path = *ppath; |
3934 | struct ext4_extent *ex; | 3934 | struct ext4_extent *ex; |
@@ -4347,7 +4347,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
4347 | (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) { | 4347 | (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) { |
4348 | allocated = convert_initialized_extent( | 4348 | allocated = convert_initialized_extent( |
4349 | handle, inode, map, &path, | 4349 | handle, inode, map, &path, |
4350 | flags, allocated, newblock); | 4350 | flags, allocated); |
4351 | goto out2; | 4351 | goto out2; |
4352 | } else if (!ext4_ext_is_unwritten(ex)) | 4352 | } else if (!ext4_ext_is_unwritten(ex)) |
4353 | goto out; | 4353 | goto out; |
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 1126436dada1..4cd318f31cbe 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -262,23 +262,8 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, | |||
262 | return result; | 262 | return result; |
263 | } | 263 | } |
264 | 264 | ||
265 | static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | ||
266 | { | ||
267 | int err; | ||
268 | struct inode *inode = file_inode(vma->vm_file); | ||
269 | |||
270 | sb_start_pagefault(inode->i_sb); | ||
271 | file_update_time(vma->vm_file); | ||
272 | down_read(&EXT4_I(inode)->i_mmap_sem); | ||
273 | err = __dax_mkwrite(vma, vmf, ext4_dax_mmap_get_block, NULL); | ||
274 | up_read(&EXT4_I(inode)->i_mmap_sem); | ||
275 | sb_end_pagefault(inode->i_sb); | ||
276 | |||
277 | return err; | ||
278 | } | ||
279 | |||
280 | /* | 265 | /* |
281 | * Handle write fault for VM_MIXEDMAP mappings. Similarly to ext4_dax_mkwrite() | 266 | * Handle write fault for VM_MIXEDMAP mappings. Similarly to ext4_dax_fault() |
282 | * handler we check for races agaist truncate. Note that since we cycle through | 267 | * handler we check for races agaist truncate. Note that since we cycle through |
283 | * i_mmap_sem, we are sure that also any hole punching that began before we | 268 | * i_mmap_sem, we are sure that also any hole punching that began before we |
284 | * were called is finished by now and so if it included part of the file we | 269 | * were called is finished by now and so if it included part of the file we |
@@ -311,7 +296,7 @@ static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma, | |||
311 | static const struct vm_operations_struct ext4_dax_vm_ops = { | 296 | static const struct vm_operations_struct ext4_dax_vm_ops = { |
312 | .fault = ext4_dax_fault, | 297 | .fault = ext4_dax_fault, |
313 | .pmd_fault = ext4_dax_pmd_fault, | 298 | .pmd_fault = ext4_dax_pmd_fault, |
314 | .page_mkwrite = ext4_dax_mkwrite, | 299 | .page_mkwrite = ext4_dax_fault, |
315 | .pfn_mkwrite = ext4_dax_pfn_mkwrite, | 300 | .pfn_mkwrite = ext4_dax_pfn_mkwrite, |
316 | }; | 301 | }; |
317 | #else | 302 | #else |
@@ -350,6 +335,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp) | |||
350 | struct super_block *sb = inode->i_sb; | 335 | struct super_block *sb = inode->i_sb; |
351 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 336 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
352 | struct vfsmount *mnt = filp->f_path.mnt; | 337 | struct vfsmount *mnt = filp->f_path.mnt; |
338 | struct inode *dir = filp->f_path.dentry->d_parent->d_inode; | ||
353 | struct path path; | 339 | struct path path; |
354 | char buf[64], *cp; | 340 | char buf[64], *cp; |
355 | int ret; | 341 | int ret; |
@@ -393,6 +379,14 @@ static int ext4_file_open(struct inode * inode, struct file * filp) | |||
393 | if (ext4_encryption_info(inode) == NULL) | 379 | if (ext4_encryption_info(inode) == NULL) |
394 | return -ENOKEY; | 380 | return -ENOKEY; |
395 | } | 381 | } |
382 | if (ext4_encrypted_inode(dir) && | ||
383 | !ext4_is_child_context_consistent_with_parent(dir, inode)) { | ||
384 | ext4_warning(inode->i_sb, | ||
385 | "Inconsistent encryption contexts: %lu/%lu\n", | ||
386 | (unsigned long) dir->i_ino, | ||
387 | (unsigned long) inode->i_ino); | ||
388 | return -EPERM; | ||
389 | } | ||
396 | /* | 390 | /* |
397 | * Set up the jbd2_inode if we are opening the inode for | 391 | * Set up the jbd2_inode if we are opening the inode for |
398 | * writing and the journal is present | 392 | * writing and the journal is present |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 3fcfd50a2e8a..acc0ad56bf2f 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -76,7 +76,6 @@ static int ext4_init_inode_bitmap(struct super_block *sb, | |||
76 | /* If checksum is bad mark all blocks and inodes use to prevent | 76 | /* If checksum is bad mark all blocks and inodes use to prevent |
77 | * allocation, essentially implementing a per-group read-only flag. */ | 77 | * allocation, essentially implementing a per-group read-only flag. */ |
78 | if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { | 78 | if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { |
79 | ext4_error(sb, "Checksum bad for group %u", block_group); | ||
80 | grp = ext4_get_group_info(sb, block_group); | 79 | grp = ext4_get_group_info(sb, block_group); |
81 | if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) | 80 | if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) |
82 | percpu_counter_sub(&sbi->s_freeclusters_counter, | 81 | percpu_counter_sub(&sbi->s_freeclusters_counter, |
@@ -191,8 +190,11 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
191 | set_buffer_verified(bh); | 190 | set_buffer_verified(bh); |
192 | ext4_unlock_group(sb, block_group); | 191 | ext4_unlock_group(sb, block_group); |
193 | unlock_buffer(bh); | 192 | unlock_buffer(bh); |
194 | if (err) | 193 | if (err) { |
194 | ext4_error(sb, "Failed to init inode bitmap for group " | ||
195 | "%u: %d", block_group, err); | ||
195 | goto out; | 196 | goto out; |
197 | } | ||
196 | return bh; | 198 | return bh; |
197 | } | 199 | } |
198 | ext4_unlock_group(sb, block_group); | 200 | ext4_unlock_group(sb, block_group); |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 83bc8bfb3bea..aee960b1af34 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -686,6 +686,34 @@ out_sem: | |||
686 | return retval; | 686 | return retval; |
687 | } | 687 | } |
688 | 688 | ||
689 | /* | ||
690 | * Update EXT4_MAP_FLAGS in bh->b_state. For buffer heads attached to pages | ||
691 | * we have to be careful as someone else may be manipulating b_state as well. | ||
692 | */ | ||
693 | static void ext4_update_bh_state(struct buffer_head *bh, unsigned long flags) | ||
694 | { | ||
695 | unsigned long old_state; | ||
696 | unsigned long new_state; | ||
697 | |||
698 | flags &= EXT4_MAP_FLAGS; | ||
699 | |||
700 | /* Dummy buffer_head? Set non-atomically. */ | ||
701 | if (!bh->b_page) { | ||
702 | bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | flags; | ||
703 | return; | ||
704 | } | ||
705 | /* | ||
706 | * Someone else may be modifying b_state. Be careful! This is ugly but | ||
707 | * once we get rid of using bh as a container for mapping information | ||
708 | * to pass to / from get_block functions, this can go away. | ||
709 | */ | ||
710 | do { | ||
711 | old_state = READ_ONCE(bh->b_state); | ||
712 | new_state = (old_state & ~EXT4_MAP_FLAGS) | flags; | ||
713 | } while (unlikely( | ||
714 | cmpxchg(&bh->b_state, old_state, new_state) != old_state)); | ||
715 | } | ||
716 | |||
689 | /* Maximum number of blocks we map for direct IO at once. */ | 717 | /* Maximum number of blocks we map for direct IO at once. */ |
690 | #define DIO_MAX_BLOCKS 4096 | 718 | #define DIO_MAX_BLOCKS 4096 |
691 | 719 | ||
@@ -722,7 +750,7 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock, | |||
722 | ext4_io_end_t *io_end = ext4_inode_aio(inode); | 750 | ext4_io_end_t *io_end = ext4_inode_aio(inode); |
723 | 751 | ||
724 | map_bh(bh, inode->i_sb, map.m_pblk); | 752 | map_bh(bh, inode->i_sb, map.m_pblk); |
725 | bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; | 753 | ext4_update_bh_state(bh, map.m_flags); |
726 | if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN) | 754 | if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN) |
727 | set_buffer_defer_completion(bh); | 755 | set_buffer_defer_completion(bh); |
728 | bh->b_size = inode->i_sb->s_blocksize * map.m_len; | 756 | bh->b_size = inode->i_sb->s_blocksize * map.m_len; |
@@ -1685,7 +1713,7 @@ int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
1685 | return ret; | 1713 | return ret; |
1686 | 1714 | ||
1687 | map_bh(bh, inode->i_sb, map.m_pblk); | 1715 | map_bh(bh, inode->i_sb, map.m_pblk); |
1688 | bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; | 1716 | ext4_update_bh_state(bh, map.m_flags); |
1689 | 1717 | ||
1690 | if (buffer_unwritten(bh)) { | 1718 | if (buffer_unwritten(bh)) { |
1691 | /* A delayed write to unwritten bh should be marked | 1719 | /* A delayed write to unwritten bh should be marked |
@@ -2450,6 +2478,10 @@ static int ext4_writepages(struct address_space *mapping, | |||
2450 | 2478 | ||
2451 | trace_ext4_writepages(inode, wbc); | 2479 | trace_ext4_writepages(inode, wbc); |
2452 | 2480 | ||
2481 | if (dax_mapping(mapping)) | ||
2482 | return dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev, | ||
2483 | wbc); | ||
2484 | |||
2453 | /* | 2485 | /* |
2454 | * No pages to write? This is mainly a kludge to avoid starting | 2486 | * No pages to write? This is mainly a kludge to avoid starting |
2455 | * a transaction for special inodes like journal inode on last iput() | 2487 | * a transaction for special inodes like journal inode on last iput() |
@@ -3253,29 +3285,29 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, | |||
3253 | * case, we allocate an io_end structure to hook to the iocb. | 3285 | * case, we allocate an io_end structure to hook to the iocb. |
3254 | */ | 3286 | */ |
3255 | iocb->private = NULL; | 3287 | iocb->private = NULL; |
3256 | ext4_inode_aio_set(inode, NULL); | ||
3257 | if (!is_sync_kiocb(iocb)) { | ||
3258 | io_end = ext4_init_io_end(inode, GFP_NOFS); | ||
3259 | if (!io_end) { | ||
3260 | ret = -ENOMEM; | ||
3261 | goto retake_lock; | ||
3262 | } | ||
3263 | /* | ||
3264 | * Grab reference for DIO. Will be dropped in ext4_end_io_dio() | ||
3265 | */ | ||
3266 | iocb->private = ext4_get_io_end(io_end); | ||
3267 | /* | ||
3268 | * we save the io structure for current async direct | ||
3269 | * IO, so that later ext4_map_blocks() could flag the | ||
3270 | * io structure whether there is a unwritten extents | ||
3271 | * needs to be converted when IO is completed. | ||
3272 | */ | ||
3273 | ext4_inode_aio_set(inode, io_end); | ||
3274 | } | ||
3275 | |||
3276 | if (overwrite) { | 3288 | if (overwrite) { |
3277 | get_block_func = ext4_get_block_overwrite; | 3289 | get_block_func = ext4_get_block_overwrite; |
3278 | } else { | 3290 | } else { |
3291 | ext4_inode_aio_set(inode, NULL); | ||
3292 | if (!is_sync_kiocb(iocb)) { | ||
3293 | io_end = ext4_init_io_end(inode, GFP_NOFS); | ||
3294 | if (!io_end) { | ||
3295 | ret = -ENOMEM; | ||
3296 | goto retake_lock; | ||
3297 | } | ||
3298 | /* | ||
3299 | * Grab reference for DIO. Will be dropped in | ||
3300 | * ext4_end_io_dio() | ||
3301 | */ | ||
3302 | iocb->private = ext4_get_io_end(io_end); | ||
3303 | /* | ||
3304 | * we save the io structure for current async direct | ||
3305 | * IO, so that later ext4_map_blocks() could flag the | ||
3306 | * io structure whether there is a unwritten extents | ||
3307 | * needs to be converted when IO is completed. | ||
3308 | */ | ||
3309 | ext4_inode_aio_set(inode, io_end); | ||
3310 | } | ||
3279 | get_block_func = ext4_get_block_write; | 3311 | get_block_func = ext4_get_block_write; |
3280 | dio_flags = DIO_LOCKING; | 3312 | dio_flags = DIO_LOCKING; |
3281 | } | 3313 | } |
@@ -4127,7 +4159,7 @@ void ext4_set_inode_flags(struct inode *inode) | |||
4127 | new_fl |= S_NOATIME; | 4159 | new_fl |= S_NOATIME; |
4128 | if (flags & EXT4_DIRSYNC_FL) | 4160 | if (flags & EXT4_DIRSYNC_FL) |
4129 | new_fl |= S_DIRSYNC; | 4161 | new_fl |= S_DIRSYNC; |
4130 | if (test_opt(inode->i_sb, DAX)) | 4162 | if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode)) |
4131 | new_fl |= S_DAX; | 4163 | new_fl |= S_DAX; |
4132 | inode_set_flags(inode, new_fl, | 4164 | inode_set_flags(inode, new_fl, |
4133 | S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX); | 4165 | S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX); |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 0f6c36922c24..eae5917c534e 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
@@ -208,7 +208,7 @@ static int ext4_ioctl_setflags(struct inode *inode, | |||
208 | { | 208 | { |
209 | struct ext4_inode_info *ei = EXT4_I(inode); | 209 | struct ext4_inode_info *ei = EXT4_I(inode); |
210 | handle_t *handle = NULL; | 210 | handle_t *handle = NULL; |
211 | int err = EPERM, migrate = 0; | 211 | int err = -EPERM, migrate = 0; |
212 | struct ext4_iloc iloc; | 212 | struct ext4_iloc iloc; |
213 | unsigned int oldflags, mask, i; | 213 | unsigned int oldflags, mask, i; |
214 | unsigned int jflag; | 214 | unsigned int jflag; |
@@ -583,6 +583,11 @@ group_extend_out: | |||
583 | "Online defrag not supported with bigalloc"); | 583 | "Online defrag not supported with bigalloc"); |
584 | err = -EOPNOTSUPP; | 584 | err = -EOPNOTSUPP; |
585 | goto mext_out; | 585 | goto mext_out; |
586 | } else if (IS_DAX(inode)) { | ||
587 | ext4_msg(sb, KERN_ERR, | ||
588 | "Online defrag not supported with DAX"); | ||
589 | err = -EOPNOTSUPP; | ||
590 | goto mext_out; | ||
586 | } | 591 | } |
587 | 592 | ||
588 | err = mnt_want_write_file(filp); | 593 | err = mnt_want_write_file(filp); |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 61eaf74dca37..4424b7bf8ac6 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -2285,7 +2285,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) | |||
2285 | if (group == 0) | 2285 | if (group == 0) |
2286 | seq_puts(seq, "#group: free frags first [" | 2286 | seq_puts(seq, "#group: free frags first [" |
2287 | " 2^0 2^1 2^2 2^3 2^4 2^5 2^6 " | 2287 | " 2^0 2^1 2^2 2^3 2^4 2^5 2^6 " |
2288 | " 2^7 2^8 2^9 2^10 2^11 2^12 2^13 ]"); | 2288 | " 2^7 2^8 2^9 2^10 2^11 2^12 2^13 ]\n"); |
2289 | 2289 | ||
2290 | i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) + | 2290 | i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) + |
2291 | sizeof(struct ext4_group_info); | 2291 | sizeof(struct ext4_group_info); |
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index fb6f11709ae6..e032a0423e35 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c | |||
@@ -265,11 +265,12 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, | |||
265 | ext4_lblk_t orig_blk_offset, donor_blk_offset; | 265 | ext4_lblk_t orig_blk_offset, donor_blk_offset; |
266 | unsigned long blocksize = orig_inode->i_sb->s_blocksize; | 266 | unsigned long blocksize = orig_inode->i_sb->s_blocksize; |
267 | unsigned int tmp_data_size, data_size, replaced_size; | 267 | unsigned int tmp_data_size, data_size, replaced_size; |
268 | int err2, jblocks, retries = 0; | 268 | int i, err2, jblocks, retries = 0; |
269 | int replaced_count = 0; | 269 | int replaced_count = 0; |
270 | int from = data_offset_in_page << orig_inode->i_blkbits; | 270 | int from = data_offset_in_page << orig_inode->i_blkbits; |
271 | int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; | 271 | int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; |
272 | struct super_block *sb = orig_inode->i_sb; | 272 | struct super_block *sb = orig_inode->i_sb; |
273 | struct buffer_head *bh = NULL; | ||
273 | 274 | ||
274 | /* | 275 | /* |
275 | * It needs twice the amount of ordinary journal buffers because | 276 | * It needs twice the amount of ordinary journal buffers because |
@@ -380,8 +381,16 @@ data_copy: | |||
380 | } | 381 | } |
381 | /* Perform all necessary steps similar write_begin()/write_end() | 382 | /* Perform all necessary steps similar write_begin()/write_end() |
382 | * but keeping in mind that i_size will not change */ | 383 | * but keeping in mind that i_size will not change */ |
383 | *err = __block_write_begin(pagep[0], from, replaced_size, | 384 | if (!page_has_buffers(pagep[0])) |
384 | ext4_get_block); | 385 | create_empty_buffers(pagep[0], 1 << orig_inode->i_blkbits, 0); |
386 | bh = page_buffers(pagep[0]); | ||
387 | for (i = 0; i < data_offset_in_page; i++) | ||
388 | bh = bh->b_this_page; | ||
389 | for (i = 0; i < block_len_in_page; i++) { | ||
390 | *err = ext4_get_block(orig_inode, orig_blk_offset + i, bh, 0); | ||
391 | if (*err < 0) | ||
392 | break; | ||
393 | } | ||
385 | if (!*err) | 394 | if (!*err) |
386 | *err = block_commit_write(pagep[0], from, from + replaced_size); | 395 | *err = block_commit_write(pagep[0], from, from + replaced_size); |
387 | 396 | ||
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 06574dd77614..48e4b8907826 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -1558,6 +1558,24 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi | |||
1558 | struct ext4_dir_entry_2 *de; | 1558 | struct ext4_dir_entry_2 *de; |
1559 | struct buffer_head *bh; | 1559 | struct buffer_head *bh; |
1560 | 1560 | ||
1561 | if (ext4_encrypted_inode(dir)) { | ||
1562 | int res = ext4_get_encryption_info(dir); | ||
1563 | |||
1564 | /* | ||
1565 | * This should be a properly defined flag for | ||
1566 | * dentry->d_flags when we uplift this to the VFS. | ||
1567 | * d_fsdata is set to (void *) 1 if if the dentry is | ||
1568 | * created while the directory was encrypted and we | ||
1569 | * don't have access to the key. | ||
1570 | */ | ||
1571 | dentry->d_fsdata = NULL; | ||
1572 | if (ext4_encryption_info(dir)) | ||
1573 | dentry->d_fsdata = (void *) 1; | ||
1574 | d_set_d_op(dentry, &ext4_encrypted_d_ops); | ||
1575 | if (res && res != -ENOKEY) | ||
1576 | return ERR_PTR(res); | ||
1577 | } | ||
1578 | |||
1561 | if (dentry->d_name.len > EXT4_NAME_LEN) | 1579 | if (dentry->d_name.len > EXT4_NAME_LEN) |
1562 | return ERR_PTR(-ENAMETOOLONG); | 1580 | return ERR_PTR(-ENAMETOOLONG); |
1563 | 1581 | ||
@@ -1585,11 +1603,15 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi | |||
1585 | return ERR_PTR(-EFSCORRUPTED); | 1603 | return ERR_PTR(-EFSCORRUPTED); |
1586 | } | 1604 | } |
1587 | if (!IS_ERR(inode) && ext4_encrypted_inode(dir) && | 1605 | if (!IS_ERR(inode) && ext4_encrypted_inode(dir) && |
1588 | (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | 1606 | (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) && |
1589 | S_ISLNK(inode->i_mode)) && | ||
1590 | !ext4_is_child_context_consistent_with_parent(dir, | 1607 | !ext4_is_child_context_consistent_with_parent(dir, |
1591 | inode)) { | 1608 | inode)) { |
1609 | int nokey = ext4_encrypted_inode(inode) && | ||
1610 | !ext4_encryption_info(inode); | ||
1611 | |||
1592 | iput(inode); | 1612 | iput(inode); |
1613 | if (nokey) | ||
1614 | return ERR_PTR(-ENOKEY); | ||
1593 | ext4_warning(inode->i_sb, | 1615 | ext4_warning(inode->i_sb, |
1594 | "Inconsistent encryption contexts: %lu/%lu\n", | 1616 | "Inconsistent encryption contexts: %lu/%lu\n", |
1595 | (unsigned long) dir->i_ino, | 1617 | (unsigned long) dir->i_ino, |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index ad62d7acc315..34038e3598d5 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -198,7 +198,7 @@ static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size) | |||
198 | if (flex_gd == NULL) | 198 | if (flex_gd == NULL) |
199 | goto out3; | 199 | goto out3; |
200 | 200 | ||
201 | if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_flex_group_data)) | 201 | if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_group_data)) |
202 | goto out2; | 202 | goto out2; |
203 | flex_gd->count = flexbg_size; | 203 | flex_gd->count = flexbg_size; |
204 | 204 | ||
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 6915c950e6e8..5c46ed9f3e14 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -223,6 +223,9 @@ static void wb_wait_for_completion(struct backing_dev_info *bdi, | |||
223 | #define WB_FRN_HIST_MAX_SLOTS (WB_FRN_HIST_THR_SLOTS / 2 + 1) | 223 | #define WB_FRN_HIST_MAX_SLOTS (WB_FRN_HIST_THR_SLOTS / 2 + 1) |
224 | /* one round can affect upto 5 slots */ | 224 | /* one round can affect upto 5 slots */ |
225 | 225 | ||
226 | static atomic_t isw_nr_in_flight = ATOMIC_INIT(0); | ||
227 | static struct workqueue_struct *isw_wq; | ||
228 | |||
226 | void __inode_attach_wb(struct inode *inode, struct page *page) | 229 | void __inode_attach_wb(struct inode *inode, struct page *page) |
227 | { | 230 | { |
228 | struct backing_dev_info *bdi = inode_to_bdi(inode); | 231 | struct backing_dev_info *bdi = inode_to_bdi(inode); |
@@ -424,6 +427,8 @@ skip_switch: | |||
424 | 427 | ||
425 | iput(inode); | 428 | iput(inode); |
426 | kfree(isw); | 429 | kfree(isw); |
430 | |||
431 | atomic_dec(&isw_nr_in_flight); | ||
427 | } | 432 | } |
428 | 433 | ||
429 | static void inode_switch_wbs_rcu_fn(struct rcu_head *rcu_head) | 434 | static void inode_switch_wbs_rcu_fn(struct rcu_head *rcu_head) |
@@ -433,7 +438,7 @@ static void inode_switch_wbs_rcu_fn(struct rcu_head *rcu_head) | |||
433 | 438 | ||
434 | /* needs to grab bh-unsafe locks, bounce to work item */ | 439 | /* needs to grab bh-unsafe locks, bounce to work item */ |
435 | INIT_WORK(&isw->work, inode_switch_wbs_work_fn); | 440 | INIT_WORK(&isw->work, inode_switch_wbs_work_fn); |
436 | schedule_work(&isw->work); | 441 | queue_work(isw_wq, &isw->work); |
437 | } | 442 | } |
438 | 443 | ||
439 | /** | 444 | /** |
@@ -469,7 +474,8 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id) | |||
469 | 474 | ||
470 | /* while holding I_WB_SWITCH, no one else can update the association */ | 475 | /* while holding I_WB_SWITCH, no one else can update the association */ |
471 | spin_lock(&inode->i_lock); | 476 | spin_lock(&inode->i_lock); |
472 | if (inode->i_state & (I_WB_SWITCH | I_FREEING) || | 477 | if (!(inode->i_sb->s_flags & MS_ACTIVE) || |
478 | inode->i_state & (I_WB_SWITCH | I_FREEING) || | ||
473 | inode_to_wb(inode) == isw->new_wb) { | 479 | inode_to_wb(inode) == isw->new_wb) { |
474 | spin_unlock(&inode->i_lock); | 480 | spin_unlock(&inode->i_lock); |
475 | goto out_free; | 481 | goto out_free; |
@@ -480,6 +486,8 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id) | |||
480 | ihold(inode); | 486 | ihold(inode); |
481 | isw->inode = inode; | 487 | isw->inode = inode; |
482 | 488 | ||
489 | atomic_inc(&isw_nr_in_flight); | ||
490 | |||
483 | /* | 491 | /* |
484 | * In addition to synchronizing among switchers, I_WB_SWITCH tells | 492 | * In addition to synchronizing among switchers, I_WB_SWITCH tells |
485 | * the RCU protected stat update paths to grab the mapping's | 493 | * the RCU protected stat update paths to grab the mapping's |
@@ -840,6 +848,33 @@ restart: | |||
840 | wb_put(last_wb); | 848 | wb_put(last_wb); |
841 | } | 849 | } |
842 | 850 | ||
851 | /** | ||
852 | * cgroup_writeback_umount - flush inode wb switches for umount | ||
853 | * | ||
854 | * This function is called when a super_block is about to be destroyed and | ||
855 | * flushes in-flight inode wb switches. An inode wb switch goes through | ||
856 | * RCU and then workqueue, so the two need to be flushed in order to ensure | ||
857 | * that all previously scheduled switches are finished. As wb switches are | ||
858 | * rare occurrences and synchronize_rcu() can take a while, perform | ||
859 | * flushing iff wb switches are in flight. | ||
860 | */ | ||
861 | void cgroup_writeback_umount(void) | ||
862 | { | ||
863 | if (atomic_read(&isw_nr_in_flight)) { | ||
864 | synchronize_rcu(); | ||
865 | flush_workqueue(isw_wq); | ||
866 | } | ||
867 | } | ||
868 | |||
869 | static int __init cgroup_writeback_init(void) | ||
870 | { | ||
871 | isw_wq = alloc_workqueue("inode_switch_wbs", 0, 0); | ||
872 | if (!isw_wq) | ||
873 | return -ENOMEM; | ||
874 | return 0; | ||
875 | } | ||
876 | fs_initcall(cgroup_writeback_init); | ||
877 | |||
843 | #else /* CONFIG_CGROUP_WRITEBACK */ | 878 | #else /* CONFIG_CGROUP_WRITEBACK */ |
844 | 879 | ||
845 | static struct bdi_writeback * | 880 | static struct bdi_writeback * |
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index 506765afa1a3..bb8d67e2740a 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c | |||
@@ -376,12 +376,11 @@ static int hpfs_unlink(struct inode *dir, struct dentry *dentry) | |||
376 | struct inode *inode = d_inode(dentry); | 376 | struct inode *inode = d_inode(dentry); |
377 | dnode_secno dno; | 377 | dnode_secno dno; |
378 | int r; | 378 | int r; |
379 | int rep = 0; | ||
380 | int err; | 379 | int err; |
381 | 380 | ||
382 | hpfs_lock(dir->i_sb); | 381 | hpfs_lock(dir->i_sb); |
383 | hpfs_adjust_length(name, &len); | 382 | hpfs_adjust_length(name, &len); |
384 | again: | 383 | |
385 | err = -ENOENT; | 384 | err = -ENOENT; |
386 | de = map_dirent(dir, hpfs_i(dir)->i_dno, name, len, &dno, &qbh); | 385 | de = map_dirent(dir, hpfs_i(dir)->i_dno, name, len, &dno, &qbh); |
387 | if (!de) | 386 | if (!de) |
@@ -401,33 +400,9 @@ again: | |||
401 | hpfs_error(dir->i_sb, "there was error when removing dirent"); | 400 | hpfs_error(dir->i_sb, "there was error when removing dirent"); |
402 | err = -EFSERROR; | 401 | err = -EFSERROR; |
403 | break; | 402 | break; |
404 | case 2: /* no space for deleting, try to truncate file */ | 403 | case 2: /* no space for deleting */ |
405 | |||
406 | err = -ENOSPC; | 404 | err = -ENOSPC; |
407 | if (rep++) | 405 | break; |
408 | break; | ||
409 | |||
410 | dentry_unhash(dentry); | ||
411 | if (!d_unhashed(dentry)) { | ||
412 | hpfs_unlock(dir->i_sb); | ||
413 | return -ENOSPC; | ||
414 | } | ||
415 | if (generic_permission(inode, MAY_WRITE) || | ||
416 | !S_ISREG(inode->i_mode) || | ||
417 | get_write_access(inode)) { | ||
418 | d_rehash(dentry); | ||
419 | } else { | ||
420 | struct iattr newattrs; | ||
421 | /*pr_info("truncating file before delete.\n");*/ | ||
422 | newattrs.ia_size = 0; | ||
423 | newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; | ||
424 | err = notify_change(dentry, &newattrs, NULL); | ||
425 | put_write_access(inode); | ||
426 | if (!err) | ||
427 | goto again; | ||
428 | } | ||
429 | hpfs_unlock(dir->i_sb); | ||
430 | return -ENOSPC; | ||
431 | default: | 406 | default: |
432 | drop_nlink(inode); | 407 | drop_nlink(inode); |
433 | err = 0; | 408 | err = 0; |
diff --git a/fs/inode.c b/fs/inode.c index 9f62db3bcc3e..69b8b526c194 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -154,6 +154,12 @@ int inode_init_always(struct super_block *sb, struct inode *inode) | |||
154 | inode->i_rdev = 0; | 154 | inode->i_rdev = 0; |
155 | inode->dirtied_when = 0; | 155 | inode->dirtied_when = 0; |
156 | 156 | ||
157 | #ifdef CONFIG_CGROUP_WRITEBACK | ||
158 | inode->i_wb_frn_winner = 0; | ||
159 | inode->i_wb_frn_avg_time = 0; | ||
160 | inode->i_wb_frn_history = 0; | ||
161 | #endif | ||
162 | |||
157 | if (security_inode_alloc(inode)) | 163 | if (security_inode_alloc(inode)) |
158 | goto out; | 164 | goto out; |
159 | spin_lock_init(&inode->i_lock); | 165 | spin_lock_init(&inode->i_lock); |
diff --git a/fs/jffs2/README.Locking b/fs/jffs2/README.Locking index 3ea36554107f..8918ac905a3b 100644 --- a/fs/jffs2/README.Locking +++ b/fs/jffs2/README.Locking | |||
@@ -2,10 +2,6 @@ | |||
2 | JFFS2 LOCKING DOCUMENTATION | 2 | JFFS2 LOCKING DOCUMENTATION |
3 | --------------------------- | 3 | --------------------------- |
4 | 4 | ||
5 | At least theoretically, JFFS2 does not require the Big Kernel Lock | ||
6 | (BKL), which was always helpfully obtained for it by Linux 2.4 VFS | ||
7 | code. It has its own locking, as described below. | ||
8 | |||
9 | This document attempts to describe the existing locking rules for | 5 | This document attempts to describe the existing locking rules for |
10 | JFFS2. It is not expected to remain perfectly up to date, but ought to | 6 | JFFS2. It is not expected to remain perfectly up to date, but ought to |
11 | be fairly close. | 7 | be fairly close. |
@@ -69,6 +65,7 @@ Ordering constraints: | |||
69 | any f->sem held. | 65 | any f->sem held. |
70 | 2. Never attempt to lock two file mutexes in one thread. | 66 | 2. Never attempt to lock two file mutexes in one thread. |
71 | No ordering rules have been made for doing so. | 67 | No ordering rules have been made for doing so. |
68 | 3. Never lock a page cache page with f->sem held. | ||
72 | 69 | ||
73 | 70 | ||
74 | erase_completion_lock spinlock | 71 | erase_completion_lock spinlock |
diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c index 0ae91ad6df2d..b288c8ae1236 100644 --- a/fs/jffs2/build.c +++ b/fs/jffs2/build.c | |||
@@ -50,7 +50,8 @@ next_inode(int *i, struct jffs2_inode_cache *ic, struct jffs2_sb_info *c) | |||
50 | 50 | ||
51 | 51 | ||
52 | static void jffs2_build_inode_pass1(struct jffs2_sb_info *c, | 52 | static void jffs2_build_inode_pass1(struct jffs2_sb_info *c, |
53 | struct jffs2_inode_cache *ic) | 53 | struct jffs2_inode_cache *ic, |
54 | int *dir_hardlinks) | ||
54 | { | 55 | { |
55 | struct jffs2_full_dirent *fd; | 56 | struct jffs2_full_dirent *fd; |
56 | 57 | ||
@@ -69,19 +70,21 @@ static void jffs2_build_inode_pass1(struct jffs2_sb_info *c, | |||
69 | dbg_fsbuild("child \"%s\" (ino #%u) of dir ino #%u doesn't exist!\n", | 70 | dbg_fsbuild("child \"%s\" (ino #%u) of dir ino #%u doesn't exist!\n", |
70 | fd->name, fd->ino, ic->ino); | 71 | fd->name, fd->ino, ic->ino); |
71 | jffs2_mark_node_obsolete(c, fd->raw); | 72 | jffs2_mark_node_obsolete(c, fd->raw); |
73 | /* Clear the ic/raw union so it doesn't cause problems later. */ | ||
74 | fd->ic = NULL; | ||
72 | continue; | 75 | continue; |
73 | } | 76 | } |
74 | 77 | ||
78 | /* From this point, fd->raw is no longer used so we can set fd->ic */ | ||
79 | fd->ic = child_ic; | ||
80 | child_ic->pino_nlink++; | ||
81 | /* If we appear (at this stage) to have hard-linked directories, | ||
82 | * set a flag to trigger a scan later */ | ||
75 | if (fd->type == DT_DIR) { | 83 | if (fd->type == DT_DIR) { |
76 | if (child_ic->pino_nlink) { | 84 | child_ic->flags |= INO_FLAGS_IS_DIR; |
77 | JFFS2_ERROR("child dir \"%s\" (ino #%u) of dir ino #%u appears to be a hard link\n", | 85 | if (child_ic->pino_nlink > 1) |
78 | fd->name, fd->ino, ic->ino); | 86 | *dir_hardlinks = 1; |
79 | /* TODO: What do we do about it? */ | 87 | } |
80 | } else { | ||
81 | child_ic->pino_nlink = ic->ino; | ||
82 | } | ||
83 | } else | ||
84 | child_ic->pino_nlink++; | ||
85 | 88 | ||
86 | dbg_fsbuild("increased nlink for child \"%s\" (ino #%u)\n", fd->name, fd->ino); | 89 | dbg_fsbuild("increased nlink for child \"%s\" (ino #%u)\n", fd->name, fd->ino); |
87 | /* Can't free scan_dents so far. We might need them in pass 2 */ | 90 | /* Can't free scan_dents so far. We might need them in pass 2 */ |
@@ -95,8 +98,7 @@ static void jffs2_build_inode_pass1(struct jffs2_sb_info *c, | |||
95 | */ | 98 | */ |
96 | static int jffs2_build_filesystem(struct jffs2_sb_info *c) | 99 | static int jffs2_build_filesystem(struct jffs2_sb_info *c) |
97 | { | 100 | { |
98 | int ret; | 101 | int ret, i, dir_hardlinks = 0; |
99 | int i; | ||
100 | struct jffs2_inode_cache *ic; | 102 | struct jffs2_inode_cache *ic; |
101 | struct jffs2_full_dirent *fd; | 103 | struct jffs2_full_dirent *fd; |
102 | struct jffs2_full_dirent *dead_fds = NULL; | 104 | struct jffs2_full_dirent *dead_fds = NULL; |
@@ -120,7 +122,7 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c) | |||
120 | /* Now scan the directory tree, increasing nlink according to every dirent found. */ | 122 | /* Now scan the directory tree, increasing nlink according to every dirent found. */ |
121 | for_each_inode(i, c, ic) { | 123 | for_each_inode(i, c, ic) { |
122 | if (ic->scan_dents) { | 124 | if (ic->scan_dents) { |
123 | jffs2_build_inode_pass1(c, ic); | 125 | jffs2_build_inode_pass1(c, ic, &dir_hardlinks); |
124 | cond_resched(); | 126 | cond_resched(); |
125 | } | 127 | } |
126 | } | 128 | } |
@@ -156,6 +158,20 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c) | |||
156 | } | 158 | } |
157 | 159 | ||
158 | dbg_fsbuild("pass 2a complete\n"); | 160 | dbg_fsbuild("pass 2a complete\n"); |
161 | |||
162 | if (dir_hardlinks) { | ||
163 | /* If we detected directory hardlinks earlier, *hopefully* | ||
164 | * they are gone now because some of the links were from | ||
165 | * dead directories which still had some old dirents lying | ||
166 | * around and not yet garbage-collected, but which have | ||
167 | * been discarded above. So clear the pino_nlink field | ||
168 | * in each directory, so that the final scan below can | ||
169 | * print appropriate warnings. */ | ||
170 | for_each_inode(i, c, ic) { | ||
171 | if (ic->flags & INO_FLAGS_IS_DIR) | ||
172 | ic->pino_nlink = 0; | ||
173 | } | ||
174 | } | ||
159 | dbg_fsbuild("freeing temporary data structures\n"); | 175 | dbg_fsbuild("freeing temporary data structures\n"); |
160 | 176 | ||
161 | /* Finally, we can scan again and free the dirent structs */ | 177 | /* Finally, we can scan again and free the dirent structs */ |
@@ -163,6 +179,33 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c) | |||
163 | while(ic->scan_dents) { | 179 | while(ic->scan_dents) { |
164 | fd = ic->scan_dents; | 180 | fd = ic->scan_dents; |
165 | ic->scan_dents = fd->next; | 181 | ic->scan_dents = fd->next; |
182 | /* We do use the pino_nlink field to count nlink of | ||
183 | * directories during fs build, so set it to the | ||
184 | * parent ino# now. Now that there's hopefully only | ||
185 | * one. */ | ||
186 | if (fd->type == DT_DIR) { | ||
187 | if (!fd->ic) { | ||
188 | /* We'll have complained about it and marked the coresponding | ||
189 | raw node obsolete already. Just skip it. */ | ||
190 | continue; | ||
191 | } | ||
192 | |||
193 | /* We *have* to have set this in jffs2_build_inode_pass1() */ | ||
194 | BUG_ON(!(fd->ic->flags & INO_FLAGS_IS_DIR)); | ||
195 | |||
196 | /* We clear ic->pino_nlink ∀ directories' ic *only* if dir_hardlinks | ||
197 | * is set. Otherwise, we know this should never trigger anyway, so | ||
198 | * we don't do the check. And ic->pino_nlink still contains the nlink | ||
199 | * value (which is 1). */ | ||
200 | if (dir_hardlinks && fd->ic->pino_nlink) { | ||
201 | JFFS2_ERROR("child dir \"%s\" (ino #%u) of dir ino #%u is also hard linked from dir ino #%u\n", | ||
202 | fd->name, fd->ino, ic->ino, fd->ic->pino_nlink); | ||
203 | /* Should we unlink it from its previous parent? */ | ||
204 | } | ||
205 | |||
206 | /* For directories, ic->pino_nlink holds that parent inode # */ | ||
207 | fd->ic->pino_nlink = ic->ino; | ||
208 | } | ||
166 | jffs2_free_full_dirent(fd); | 209 | jffs2_free_full_dirent(fd); |
167 | } | 210 | } |
168 | ic->scan_dents = NULL; | 211 | ic->scan_dents = NULL; |
@@ -241,11 +284,7 @@ static void jffs2_build_remove_unlinked_inode(struct jffs2_sb_info *c, | |||
241 | 284 | ||
242 | /* Reduce nlink of the child. If it's now zero, stick it on the | 285 | /* Reduce nlink of the child. If it's now zero, stick it on the |
243 | dead_fds list to be cleaned up later. Else just free the fd */ | 286 | dead_fds list to be cleaned up later. Else just free the fd */ |
244 | 287 | child_ic->pino_nlink--; | |
245 | if (fd->type == DT_DIR) | ||
246 | child_ic->pino_nlink = 0; | ||
247 | else | ||
248 | child_ic->pino_nlink--; | ||
249 | 288 | ||
250 | if (!child_ic->pino_nlink) { | 289 | if (!child_ic->pino_nlink) { |
251 | dbg_fsbuild("inode #%u (\"%s\") now has no links; adding to dead_fds list.\n", | 290 | dbg_fsbuild("inode #%u (\"%s\") now has no links; adding to dead_fds list.\n", |
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index c5ac5944bc1b..cad86bac3453 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c | |||
@@ -137,39 +137,33 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, | |||
137 | struct page *pg; | 137 | struct page *pg; |
138 | struct inode *inode = mapping->host; | 138 | struct inode *inode = mapping->host; |
139 | struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); | 139 | struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); |
140 | struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); | ||
141 | struct jffs2_raw_inode ri; | ||
142 | uint32_t alloc_len = 0; | ||
143 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; | 140 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; |
144 | uint32_t pageofs = index << PAGE_CACHE_SHIFT; | 141 | uint32_t pageofs = index << PAGE_CACHE_SHIFT; |
145 | int ret = 0; | 142 | int ret = 0; |
146 | 143 | ||
147 | jffs2_dbg(1, "%s()\n", __func__); | ||
148 | |||
149 | if (pageofs > inode->i_size) { | ||
150 | ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len, | ||
151 | ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); | ||
152 | if (ret) | ||
153 | return ret; | ||
154 | } | ||
155 | |||
156 | mutex_lock(&f->sem); | ||
157 | pg = grab_cache_page_write_begin(mapping, index, flags); | 144 | pg = grab_cache_page_write_begin(mapping, index, flags); |
158 | if (!pg) { | 145 | if (!pg) |
159 | if (alloc_len) | ||
160 | jffs2_complete_reservation(c); | ||
161 | mutex_unlock(&f->sem); | ||
162 | return -ENOMEM; | 146 | return -ENOMEM; |
163 | } | ||
164 | *pagep = pg; | 147 | *pagep = pg; |
165 | 148 | ||
166 | if (alloc_len) { | 149 | jffs2_dbg(1, "%s()\n", __func__); |
150 | |||
151 | if (pageofs > inode->i_size) { | ||
167 | /* Make new hole frag from old EOF to new page */ | 152 | /* Make new hole frag from old EOF to new page */ |
153 | struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); | ||
154 | struct jffs2_raw_inode ri; | ||
168 | struct jffs2_full_dnode *fn; | 155 | struct jffs2_full_dnode *fn; |
156 | uint32_t alloc_len; | ||
169 | 157 | ||
170 | jffs2_dbg(1, "Writing new hole frag 0x%x-0x%x between current EOF and new page\n", | 158 | jffs2_dbg(1, "Writing new hole frag 0x%x-0x%x between current EOF and new page\n", |
171 | (unsigned int)inode->i_size, pageofs); | 159 | (unsigned int)inode->i_size, pageofs); |
172 | 160 | ||
161 | ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len, | ||
162 | ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); | ||
163 | if (ret) | ||
164 | goto out_page; | ||
165 | |||
166 | mutex_lock(&f->sem); | ||
173 | memset(&ri, 0, sizeof(ri)); | 167 | memset(&ri, 0, sizeof(ri)); |
174 | 168 | ||
175 | ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK); | 169 | ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK); |
@@ -196,6 +190,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, | |||
196 | if (IS_ERR(fn)) { | 190 | if (IS_ERR(fn)) { |
197 | ret = PTR_ERR(fn); | 191 | ret = PTR_ERR(fn); |
198 | jffs2_complete_reservation(c); | 192 | jffs2_complete_reservation(c); |
193 | mutex_unlock(&f->sem); | ||
199 | goto out_page; | 194 | goto out_page; |
200 | } | 195 | } |
201 | ret = jffs2_add_full_dnode_to_inode(c, f, fn); | 196 | ret = jffs2_add_full_dnode_to_inode(c, f, fn); |
@@ -210,10 +205,12 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, | |||
210 | jffs2_mark_node_obsolete(c, fn->raw); | 205 | jffs2_mark_node_obsolete(c, fn->raw); |
211 | jffs2_free_full_dnode(fn); | 206 | jffs2_free_full_dnode(fn); |
212 | jffs2_complete_reservation(c); | 207 | jffs2_complete_reservation(c); |
208 | mutex_unlock(&f->sem); | ||
213 | goto out_page; | 209 | goto out_page; |
214 | } | 210 | } |
215 | jffs2_complete_reservation(c); | 211 | jffs2_complete_reservation(c); |
216 | inode->i_size = pageofs; | 212 | inode->i_size = pageofs; |
213 | mutex_unlock(&f->sem); | ||
217 | } | 214 | } |
218 | 215 | ||
219 | /* | 216 | /* |
@@ -222,18 +219,18 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, | |||
222 | * case of a short-copy. | 219 | * case of a short-copy. |
223 | */ | 220 | */ |
224 | if (!PageUptodate(pg)) { | 221 | if (!PageUptodate(pg)) { |
222 | mutex_lock(&f->sem); | ||
225 | ret = jffs2_do_readpage_nolock(inode, pg); | 223 | ret = jffs2_do_readpage_nolock(inode, pg); |
224 | mutex_unlock(&f->sem); | ||
226 | if (ret) | 225 | if (ret) |
227 | goto out_page; | 226 | goto out_page; |
228 | } | 227 | } |
229 | mutex_unlock(&f->sem); | ||
230 | jffs2_dbg(1, "end write_begin(). pg->flags %lx\n", pg->flags); | 228 | jffs2_dbg(1, "end write_begin(). pg->flags %lx\n", pg->flags); |
231 | return ret; | 229 | return ret; |
232 | 230 | ||
233 | out_page: | 231 | out_page: |
234 | unlock_page(pg); | 232 | unlock_page(pg); |
235 | page_cache_release(pg); | 233 | page_cache_release(pg); |
236 | mutex_unlock(&f->sem); | ||
237 | return ret; | 234 | return ret; |
238 | } | 235 | } |
239 | 236 | ||
diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c index 5a2dec2b064c..95d5880a63ee 100644 --- a/fs/jffs2/gc.c +++ b/fs/jffs2/gc.c | |||
@@ -1296,14 +1296,17 @@ static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_era | |||
1296 | BUG_ON(start > orig_start); | 1296 | BUG_ON(start > orig_start); |
1297 | } | 1297 | } |
1298 | 1298 | ||
1299 | /* First, use readpage() to read the appropriate page into the page cache */ | 1299 | /* The rules state that we must obtain the page lock *before* f->sem, so |
1300 | /* Q: What happens if we actually try to GC the _same_ page for which commit_write() | 1300 | * drop f->sem temporarily. Since we also hold c->alloc_sem, nothing's |
1301 | * triggered garbage collection in the first place? | 1301 | * actually going to *change* so we're safe; we only allow reading. |
1302 | * A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the | 1302 | * |
1303 | * page OK. We'll actually write it out again in commit_write, which is a little | 1303 | * It is important to note that jffs2_write_begin() will ensure that its |
1304 | * suboptimal, but at least we're correct. | 1304 | * page is marked Uptodate before allocating space. That means that if we |
1305 | */ | 1305 | * end up here trying to GC the *same* page that jffs2_write_begin() is |
1306 | * trying to write out, read_cache_page() will not deadlock. */ | ||
1307 | mutex_unlock(&f->sem); | ||
1306 | pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg); | 1308 | pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg); |
1309 | mutex_lock(&f->sem); | ||
1307 | 1310 | ||
1308 | if (IS_ERR(pg_ptr)) { | 1311 | if (IS_ERR(pg_ptr)) { |
1309 | pr_warn("read_cache_page() returned error: %ld\n", | 1312 | pr_warn("read_cache_page() returned error: %ld\n", |
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h index fa35ff79ab35..0637271f3770 100644 --- a/fs/jffs2/nodelist.h +++ b/fs/jffs2/nodelist.h | |||
@@ -194,6 +194,7 @@ struct jffs2_inode_cache { | |||
194 | #define INO_STATE_CLEARING 6 /* In clear_inode() */ | 194 | #define INO_STATE_CLEARING 6 /* In clear_inode() */ |
195 | 195 | ||
196 | #define INO_FLAGS_XATTR_CHECKED 0x01 /* has no duplicate xattr_ref */ | 196 | #define INO_FLAGS_XATTR_CHECKED 0x01 /* has no duplicate xattr_ref */ |
197 | #define INO_FLAGS_IS_DIR 0x02 /* is a directory */ | ||
197 | 198 | ||
198 | #define RAWNODE_CLASS_INODE_CACHE 0 | 199 | #define RAWNODE_CLASS_INODE_CACHE 0 |
199 | #define RAWNODE_CLASS_XATTR_DATUM 1 | 200 | #define RAWNODE_CLASS_XATTR_DATUM 1 |
@@ -249,7 +250,10 @@ struct jffs2_readinode_info | |||
249 | 250 | ||
250 | struct jffs2_full_dirent | 251 | struct jffs2_full_dirent |
251 | { | 252 | { |
252 | struct jffs2_raw_node_ref *raw; | 253 | union { |
254 | struct jffs2_raw_node_ref *raw; | ||
255 | struct jffs2_inode_cache *ic; /* Just during part of build */ | ||
256 | }; | ||
253 | struct jffs2_full_dirent *next; | 257 | struct jffs2_full_dirent *next; |
254 | uint32_t version; | 258 | uint32_t version; |
255 | uint32_t ino; /* == zero for unlink */ | 259 | uint32_t ino; /* == zero for unlink */ |
diff --git a/fs/namei.c b/fs/namei.c index f624d132e01e..9c590e0f66e9 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -1712,6 +1712,11 @@ static inline int should_follow_link(struct nameidata *nd, struct path *link, | |||
1712 | return 0; | 1712 | return 0; |
1713 | if (!follow) | 1713 | if (!follow) |
1714 | return 0; | 1714 | return 0; |
1715 | /* make sure that d_is_symlink above matches inode */ | ||
1716 | if (nd->flags & LOOKUP_RCU) { | ||
1717 | if (read_seqcount_retry(&link->dentry->d_seq, seq)) | ||
1718 | return -ECHILD; | ||
1719 | } | ||
1715 | return pick_link(nd, link, inode, seq); | 1720 | return pick_link(nd, link, inode, seq); |
1716 | } | 1721 | } |
1717 | 1722 | ||
@@ -1743,11 +1748,11 @@ static int walk_component(struct nameidata *nd, int flags) | |||
1743 | if (err < 0) | 1748 | if (err < 0) |
1744 | return err; | 1749 | return err; |
1745 | 1750 | ||
1746 | inode = d_backing_inode(path.dentry); | ||
1747 | seq = 0; /* we are already out of RCU mode */ | 1751 | seq = 0; /* we are already out of RCU mode */ |
1748 | err = -ENOENT; | 1752 | err = -ENOENT; |
1749 | if (d_is_negative(path.dentry)) | 1753 | if (d_is_negative(path.dentry)) |
1750 | goto out_path_put; | 1754 | goto out_path_put; |
1755 | inode = d_backing_inode(path.dentry); | ||
1751 | } | 1756 | } |
1752 | 1757 | ||
1753 | if (flags & WALK_PUT) | 1758 | if (flags & WALK_PUT) |
@@ -3192,12 +3197,12 @@ retry_lookup: | |||
3192 | return error; | 3197 | return error; |
3193 | 3198 | ||
3194 | BUG_ON(nd->flags & LOOKUP_RCU); | 3199 | BUG_ON(nd->flags & LOOKUP_RCU); |
3195 | inode = d_backing_inode(path.dentry); | ||
3196 | seq = 0; /* out of RCU mode, so the value doesn't matter */ | 3200 | seq = 0; /* out of RCU mode, so the value doesn't matter */ |
3197 | if (unlikely(d_is_negative(path.dentry))) { | 3201 | if (unlikely(d_is_negative(path.dentry))) { |
3198 | path_to_nameidata(&path, nd); | 3202 | path_to_nameidata(&path, nd); |
3199 | return -ENOENT; | 3203 | return -ENOENT; |
3200 | } | 3204 | } |
3205 | inode = d_backing_inode(path.dentry); | ||
3201 | finish_lookup: | 3206 | finish_lookup: |
3202 | if (nd->depth) | 3207 | if (nd->depth) |
3203 | put_link(nd); | 3208 | put_link(nd); |
@@ -3206,11 +3211,6 @@ finish_lookup: | |||
3206 | if (unlikely(error)) | 3211 | if (unlikely(error)) |
3207 | return error; | 3212 | return error; |
3208 | 3213 | ||
3209 | if (unlikely(d_is_symlink(path.dentry)) && !(open_flag & O_PATH)) { | ||
3210 | path_to_nameidata(&path, nd); | ||
3211 | return -ELOOP; | ||
3212 | } | ||
3213 | |||
3214 | if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path.mnt) { | 3214 | if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path.mnt) { |
3215 | path_to_nameidata(&path, nd); | 3215 | path_to_nameidata(&path, nd); |
3216 | } else { | 3216 | } else { |
@@ -3229,6 +3229,10 @@ finish_open: | |||
3229 | return error; | 3229 | return error; |
3230 | } | 3230 | } |
3231 | audit_inode(nd->name, nd->path.dentry, 0); | 3231 | audit_inode(nd->name, nd->path.dentry, 0); |
3232 | if (unlikely(d_is_symlink(nd->path.dentry)) && !(open_flag & O_PATH)) { | ||
3233 | error = -ELOOP; | ||
3234 | goto out; | ||
3235 | } | ||
3232 | error = -EISDIR; | 3236 | error = -EISDIR; |
3233 | if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry)) | 3237 | if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry)) |
3234 | goto out; | 3238 | goto out; |
@@ -3273,6 +3277,10 @@ opened: | |||
3273 | goto exit_fput; | 3277 | goto exit_fput; |
3274 | } | 3278 | } |
3275 | out: | 3279 | out: |
3280 | if (unlikely(error > 0)) { | ||
3281 | WARN_ON(1); | ||
3282 | error = -EINVAL; | ||
3283 | } | ||
3276 | if (got_write) | 3284 | if (got_write) |
3277 | mnt_drop_write(nd->path.mnt); | 3285 | mnt_drop_write(nd->path.mnt); |
3278 | path_put(&save_parent); | 3286 | path_put(&save_parent); |
diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c index c59a59c37f3d..35ab51c04814 100644 --- a/fs/nfs/blocklayout/extent_tree.c +++ b/fs/nfs/blocklayout/extent_tree.c | |||
@@ -476,6 +476,7 @@ static void ext_tree_free_commitdata(struct nfs4_layoutcommit_args *arg, | |||
476 | 476 | ||
477 | for (i = 0; i < nr_pages; i++) | 477 | for (i = 0; i < nr_pages; i++) |
478 | put_page(arg->layoutupdate_pages[i]); | 478 | put_page(arg->layoutupdate_pages[i]); |
479 | vfree(arg->start_p); | ||
479 | kfree(arg->layoutupdate_pages); | 480 | kfree(arg->layoutupdate_pages); |
480 | } else { | 481 | } else { |
481 | put_page(arg->layoutupdate_page); | 482 | put_page(arg->layoutupdate_page); |
@@ -559,10 +560,15 @@ retry: | |||
559 | 560 | ||
560 | if (unlikely(arg->layoutupdate_pages != &arg->layoutupdate_page)) { | 561 | if (unlikely(arg->layoutupdate_pages != &arg->layoutupdate_page)) { |
561 | void *p = start_p, *end = p + arg->layoutupdate_len; | 562 | void *p = start_p, *end = p + arg->layoutupdate_len; |
563 | struct page *page = NULL; | ||
562 | int i = 0; | 564 | int i = 0; |
563 | 565 | ||
564 | for ( ; p < end; p += PAGE_SIZE) | 566 | arg->start_p = start_p; |
565 | arg->layoutupdate_pages[i++] = vmalloc_to_page(p); | 567 | for ( ; p < end; p += PAGE_SIZE) { |
568 | page = vmalloc_to_page(p); | ||
569 | arg->layoutupdate_pages[i++] = page; | ||
570 | get_page(page); | ||
571 | } | ||
566 | } | 572 | } |
567 | 573 | ||
568 | dprintk("%s found %zu ranges\n", __func__, count); | 574 | dprintk("%s found %zu ranges\n", __func__, count); |
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index bd25dc7077f7..dff83460e5a6 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c | |||
@@ -16,29 +16,8 @@ | |||
16 | 16 | ||
17 | #define NFSDBG_FACILITY NFSDBG_PROC | 17 | #define NFSDBG_FACILITY NFSDBG_PROC |
18 | 18 | ||
19 | static int nfs42_set_rw_stateid(nfs4_stateid *dst, struct file *file, | ||
20 | fmode_t fmode) | ||
21 | { | ||
22 | struct nfs_open_context *open; | ||
23 | struct nfs_lock_context *lock; | ||
24 | int ret; | ||
25 | |||
26 | open = get_nfs_open_context(nfs_file_open_context(file)); | ||
27 | lock = nfs_get_lock_context(open); | ||
28 | if (IS_ERR(lock)) { | ||
29 | put_nfs_open_context(open); | ||
30 | return PTR_ERR(lock); | ||
31 | } | ||
32 | |||
33 | ret = nfs4_set_rw_stateid(dst, open, lock, fmode); | ||
34 | |||
35 | nfs_put_lock_context(lock); | ||
36 | put_nfs_open_context(open); | ||
37 | return ret; | ||
38 | } | ||
39 | |||
40 | static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, | 19 | static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, |
41 | loff_t offset, loff_t len) | 20 | struct nfs_lock_context *lock, loff_t offset, loff_t len) |
42 | { | 21 | { |
43 | struct inode *inode = file_inode(filep); | 22 | struct inode *inode = file_inode(filep); |
44 | struct nfs_server *server = NFS_SERVER(inode); | 23 | struct nfs_server *server = NFS_SERVER(inode); |
@@ -56,7 +35,8 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, | |||
56 | msg->rpc_argp = &args; | 35 | msg->rpc_argp = &args; |
57 | msg->rpc_resp = &res; | 36 | msg->rpc_resp = &res; |
58 | 37 | ||
59 | status = nfs42_set_rw_stateid(&args.falloc_stateid, filep, FMODE_WRITE); | 38 | status = nfs4_set_rw_stateid(&args.falloc_stateid, lock->open_context, |
39 | lock, FMODE_WRITE); | ||
60 | if (status) | 40 | if (status) |
61 | return status; | 41 | return status; |
62 | 42 | ||
@@ -78,15 +58,26 @@ static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, | |||
78 | { | 58 | { |
79 | struct nfs_server *server = NFS_SERVER(file_inode(filep)); | 59 | struct nfs_server *server = NFS_SERVER(file_inode(filep)); |
80 | struct nfs4_exception exception = { }; | 60 | struct nfs4_exception exception = { }; |
61 | struct nfs_lock_context *lock; | ||
81 | int err; | 62 | int err; |
82 | 63 | ||
64 | lock = nfs_get_lock_context(nfs_file_open_context(filep)); | ||
65 | if (IS_ERR(lock)) | ||
66 | return PTR_ERR(lock); | ||
67 | |||
68 | exception.inode = file_inode(filep); | ||
69 | exception.state = lock->open_context->state; | ||
70 | |||
83 | do { | 71 | do { |
84 | err = _nfs42_proc_fallocate(msg, filep, offset, len); | 72 | err = _nfs42_proc_fallocate(msg, filep, lock, offset, len); |
85 | if (err == -ENOTSUPP) | 73 | if (err == -ENOTSUPP) { |
86 | return -EOPNOTSUPP; | 74 | err = -EOPNOTSUPP; |
75 | break; | ||
76 | } | ||
87 | err = nfs4_handle_exception(server, err, &exception); | 77 | err = nfs4_handle_exception(server, err, &exception); |
88 | } while (exception.retry); | 78 | } while (exception.retry); |
89 | 79 | ||
80 | nfs_put_lock_context(lock); | ||
90 | return err; | 81 | return err; |
91 | } | 82 | } |
92 | 83 | ||
@@ -135,7 +126,8 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len) | |||
135 | return err; | 126 | return err; |
136 | } | 127 | } |
137 | 128 | ||
138 | static loff_t _nfs42_proc_llseek(struct file *filep, loff_t offset, int whence) | 129 | static loff_t _nfs42_proc_llseek(struct file *filep, |
130 | struct nfs_lock_context *lock, loff_t offset, int whence) | ||
139 | { | 131 | { |
140 | struct inode *inode = file_inode(filep); | 132 | struct inode *inode = file_inode(filep); |
141 | struct nfs42_seek_args args = { | 133 | struct nfs42_seek_args args = { |
@@ -156,7 +148,8 @@ static loff_t _nfs42_proc_llseek(struct file *filep, loff_t offset, int whence) | |||
156 | if (!nfs_server_capable(inode, NFS_CAP_SEEK)) | 148 | if (!nfs_server_capable(inode, NFS_CAP_SEEK)) |
157 | return -ENOTSUPP; | 149 | return -ENOTSUPP; |
158 | 150 | ||
159 | status = nfs42_set_rw_stateid(&args.sa_stateid, filep, FMODE_READ); | 151 | status = nfs4_set_rw_stateid(&args.sa_stateid, lock->open_context, |
152 | lock, FMODE_READ); | ||
160 | if (status) | 153 | if (status) |
161 | return status; | 154 | return status; |
162 | 155 | ||
@@ -175,17 +168,28 @@ loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence) | |||
175 | { | 168 | { |
176 | struct nfs_server *server = NFS_SERVER(file_inode(filep)); | 169 | struct nfs_server *server = NFS_SERVER(file_inode(filep)); |
177 | struct nfs4_exception exception = { }; | 170 | struct nfs4_exception exception = { }; |
171 | struct nfs_lock_context *lock; | ||
178 | loff_t err; | 172 | loff_t err; |
179 | 173 | ||
174 | lock = nfs_get_lock_context(nfs_file_open_context(filep)); | ||
175 | if (IS_ERR(lock)) | ||
176 | return PTR_ERR(lock); | ||
177 | |||
178 | exception.inode = file_inode(filep); | ||
179 | exception.state = lock->open_context->state; | ||
180 | |||
180 | do { | 181 | do { |
181 | err = _nfs42_proc_llseek(filep, offset, whence); | 182 | err = _nfs42_proc_llseek(filep, lock, offset, whence); |
182 | if (err >= 0) | 183 | if (err >= 0) |
183 | break; | 184 | break; |
184 | if (err == -ENOTSUPP) | 185 | if (err == -ENOTSUPP) { |
185 | return -EOPNOTSUPP; | 186 | err = -EOPNOTSUPP; |
187 | break; | ||
188 | } | ||
186 | err = nfs4_handle_exception(server, err, &exception); | 189 | err = nfs4_handle_exception(server, err, &exception); |
187 | } while (exception.retry); | 190 | } while (exception.retry); |
188 | 191 | ||
192 | nfs_put_lock_context(lock); | ||
189 | return err; | 193 | return err; |
190 | } | 194 | } |
191 | 195 | ||
@@ -298,8 +302,9 @@ int nfs42_proc_layoutstats_generic(struct nfs_server *server, | |||
298 | } | 302 | } |
299 | 303 | ||
300 | static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f, | 304 | static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f, |
301 | struct file *dst_f, loff_t src_offset, | 305 | struct file *dst_f, struct nfs_lock_context *src_lock, |
302 | loff_t dst_offset, loff_t count) | 306 | struct nfs_lock_context *dst_lock, loff_t src_offset, |
307 | loff_t dst_offset, loff_t count) | ||
303 | { | 308 | { |
304 | struct inode *src_inode = file_inode(src_f); | 309 | struct inode *src_inode = file_inode(src_f); |
305 | struct inode *dst_inode = file_inode(dst_f); | 310 | struct inode *dst_inode = file_inode(dst_f); |
@@ -320,11 +325,13 @@ static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f, | |||
320 | msg->rpc_argp = &args; | 325 | msg->rpc_argp = &args; |
321 | msg->rpc_resp = &res; | 326 | msg->rpc_resp = &res; |
322 | 327 | ||
323 | status = nfs42_set_rw_stateid(&args.src_stateid, src_f, FMODE_READ); | 328 | status = nfs4_set_rw_stateid(&args.src_stateid, src_lock->open_context, |
329 | src_lock, FMODE_READ); | ||
324 | if (status) | 330 | if (status) |
325 | return status; | 331 | return status; |
326 | 332 | ||
327 | status = nfs42_set_rw_stateid(&args.dst_stateid, dst_f, FMODE_WRITE); | 333 | status = nfs4_set_rw_stateid(&args.dst_stateid, dst_lock->open_context, |
334 | dst_lock, FMODE_WRITE); | ||
328 | if (status) | 335 | if (status) |
329 | return status; | 336 | return status; |
330 | 337 | ||
@@ -349,22 +356,48 @@ int nfs42_proc_clone(struct file *src_f, struct file *dst_f, | |||
349 | }; | 356 | }; |
350 | struct inode *inode = file_inode(src_f); | 357 | struct inode *inode = file_inode(src_f); |
351 | struct nfs_server *server = NFS_SERVER(file_inode(src_f)); | 358 | struct nfs_server *server = NFS_SERVER(file_inode(src_f)); |
352 | struct nfs4_exception exception = { }; | 359 | struct nfs_lock_context *src_lock; |
353 | int err; | 360 | struct nfs_lock_context *dst_lock; |
361 | struct nfs4_exception src_exception = { }; | ||
362 | struct nfs4_exception dst_exception = { }; | ||
363 | int err, err2; | ||
354 | 364 | ||
355 | if (!nfs_server_capable(inode, NFS_CAP_CLONE)) | 365 | if (!nfs_server_capable(inode, NFS_CAP_CLONE)) |
356 | return -EOPNOTSUPP; | 366 | return -EOPNOTSUPP; |
357 | 367 | ||
368 | src_lock = nfs_get_lock_context(nfs_file_open_context(src_f)); | ||
369 | if (IS_ERR(src_lock)) | ||
370 | return PTR_ERR(src_lock); | ||
371 | |||
372 | src_exception.inode = file_inode(src_f); | ||
373 | src_exception.state = src_lock->open_context->state; | ||
374 | |||
375 | dst_lock = nfs_get_lock_context(nfs_file_open_context(dst_f)); | ||
376 | if (IS_ERR(dst_lock)) { | ||
377 | err = PTR_ERR(dst_lock); | ||
378 | goto out_put_src_lock; | ||
379 | } | ||
380 | |||
381 | dst_exception.inode = file_inode(dst_f); | ||
382 | dst_exception.state = dst_lock->open_context->state; | ||
383 | |||
358 | do { | 384 | do { |
359 | err = _nfs42_proc_clone(&msg, src_f, dst_f, src_offset, | 385 | err = _nfs42_proc_clone(&msg, src_f, dst_f, src_lock, dst_lock, |
360 | dst_offset, count); | 386 | src_offset, dst_offset, count); |
361 | if (err == -ENOTSUPP || err == -EOPNOTSUPP) { | 387 | if (err == -ENOTSUPP || err == -EOPNOTSUPP) { |
362 | NFS_SERVER(inode)->caps &= ~NFS_CAP_CLONE; | 388 | NFS_SERVER(inode)->caps &= ~NFS_CAP_CLONE; |
363 | return -EOPNOTSUPP; | 389 | err = -EOPNOTSUPP; |
390 | break; | ||
364 | } | 391 | } |
365 | err = nfs4_handle_exception(server, err, &exception); | ||
366 | } while (exception.retry); | ||
367 | 392 | ||
368 | return err; | 393 | err2 = nfs4_handle_exception(server, err, &src_exception); |
394 | err = nfs4_handle_exception(server, err, &dst_exception); | ||
395 | if (!err) | ||
396 | err = err2; | ||
397 | } while (src_exception.retry || dst_exception.retry); | ||
369 | 398 | ||
399 | nfs_put_lock_context(dst_lock); | ||
400 | out_put_src_lock: | ||
401 | nfs_put_lock_context(src_lock); | ||
402 | return err; | ||
370 | } | 403 | } |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4bfc33ad0563..14881594dd07 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -2466,9 +2466,9 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, | |||
2466 | dentry = d_add_unique(dentry, igrab(state->inode)); | 2466 | dentry = d_add_unique(dentry, igrab(state->inode)); |
2467 | if (dentry == NULL) { | 2467 | if (dentry == NULL) { |
2468 | dentry = opendata->dentry; | 2468 | dentry = opendata->dentry; |
2469 | } else if (dentry != ctx->dentry) { | 2469 | } else { |
2470 | dput(ctx->dentry); | 2470 | dput(ctx->dentry); |
2471 | ctx->dentry = dget(dentry); | 2471 | ctx->dentry = dentry; |
2472 | } | 2472 | } |
2473 | nfs_set_verifier(dentry, | 2473 | nfs_set_verifier(dentry, |
2474 | nfs_save_change_attribute(d_inode(opendata->dir))); | 2474 | nfs_save_change_attribute(d_inode(opendata->dir))); |
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 482b6e94bb37..2fa483e6dbe2 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
@@ -252,6 +252,27 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) | |||
252 | } | 252 | } |
253 | } | 253 | } |
254 | 254 | ||
255 | /* | ||
256 | * Mark a pnfs_layout_hdr and all associated layout segments as invalid | ||
257 | * | ||
258 | * In order to continue using the pnfs_layout_hdr, a full recovery | ||
259 | * is required. | ||
260 | * Note that caller must hold inode->i_lock. | ||
261 | */ | ||
262 | static int | ||
263 | pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo, | ||
264 | struct list_head *lseg_list) | ||
265 | { | ||
266 | struct pnfs_layout_range range = { | ||
267 | .iomode = IOMODE_ANY, | ||
268 | .offset = 0, | ||
269 | .length = NFS4_MAX_UINT64, | ||
270 | }; | ||
271 | |||
272 | set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); | ||
273 | return pnfs_mark_matching_lsegs_invalid(lo, lseg_list, &range); | ||
274 | } | ||
275 | |||
255 | static int | 276 | static int |
256 | pnfs_iomode_to_fail_bit(u32 iomode) | 277 | pnfs_iomode_to_fail_bit(u32 iomode) |
257 | { | 278 | { |
@@ -554,9 +575,8 @@ pnfs_destroy_layout(struct nfs_inode *nfsi) | |||
554 | spin_lock(&nfsi->vfs_inode.i_lock); | 575 | spin_lock(&nfsi->vfs_inode.i_lock); |
555 | lo = nfsi->layout; | 576 | lo = nfsi->layout; |
556 | if (lo) { | 577 | if (lo) { |
557 | lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */ | ||
558 | pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL); | ||
559 | pnfs_get_layout_hdr(lo); | 578 | pnfs_get_layout_hdr(lo); |
579 | pnfs_mark_layout_stateid_invalid(lo, &tmp_list); | ||
560 | pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED); | 580 | pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED); |
561 | pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED); | 581 | pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED); |
562 | spin_unlock(&nfsi->vfs_inode.i_lock); | 582 | spin_unlock(&nfsi->vfs_inode.i_lock); |
@@ -617,11 +637,6 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list, | |||
617 | { | 637 | { |
618 | struct pnfs_layout_hdr *lo; | 638 | struct pnfs_layout_hdr *lo; |
619 | struct inode *inode; | 639 | struct inode *inode; |
620 | struct pnfs_layout_range range = { | ||
621 | .iomode = IOMODE_ANY, | ||
622 | .offset = 0, | ||
623 | .length = NFS4_MAX_UINT64, | ||
624 | }; | ||
625 | LIST_HEAD(lseg_list); | 640 | LIST_HEAD(lseg_list); |
626 | int ret = 0; | 641 | int ret = 0; |
627 | 642 | ||
@@ -636,11 +651,11 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list, | |||
636 | 651 | ||
637 | spin_lock(&inode->i_lock); | 652 | spin_lock(&inode->i_lock); |
638 | list_del_init(&lo->plh_bulk_destroy); | 653 | list_del_init(&lo->plh_bulk_destroy); |
639 | lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */ | 654 | if (pnfs_mark_layout_stateid_invalid(lo, &lseg_list)) { |
640 | if (is_bulk_recall) | 655 | if (is_bulk_recall) |
641 | set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); | 656 | set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); |
642 | if (pnfs_mark_matching_lsegs_invalid(lo, &lseg_list, &range)) | ||
643 | ret = -EAGAIN; | 657 | ret = -EAGAIN; |
658 | } | ||
644 | spin_unlock(&inode->i_lock); | 659 | spin_unlock(&inode->i_lock); |
645 | pnfs_free_lseg_list(&lseg_list); | 660 | pnfs_free_lseg_list(&lseg_list); |
646 | /* Free all lsegs that are attached to commit buckets */ | 661 | /* Free all lsegs that are attached to commit buckets */ |
@@ -1738,8 +1753,19 @@ pnfs_set_plh_return_iomode(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode) | |||
1738 | if (lo->plh_return_iomode != 0) | 1753 | if (lo->plh_return_iomode != 0) |
1739 | iomode = IOMODE_ANY; | 1754 | iomode = IOMODE_ANY; |
1740 | lo->plh_return_iomode = iomode; | 1755 | lo->plh_return_iomode = iomode; |
1756 | set_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); | ||
1741 | } | 1757 | } |
1742 | 1758 | ||
1759 | /** | ||
1760 | * pnfs_mark_matching_lsegs_return - Free or return matching layout segments | ||
1761 | * @lo: pointer to layout header | ||
1762 | * @tmp_list: list header to be used with pnfs_free_lseg_list() | ||
1763 | * @return_range: describe layout segment ranges to be returned | ||
1764 | * | ||
1765 | * This function is mainly intended for use by layoutrecall. It attempts | ||
1766 | * to free the layout segment immediately, or else to mark it for return | ||
1767 | * as soon as its reference count drops to zero. | ||
1768 | */ | ||
1743 | int | 1769 | int |
1744 | pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, | 1770 | pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, |
1745 | struct list_head *tmp_list, | 1771 | struct list_head *tmp_list, |
@@ -1762,12 +1788,11 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, | |||
1762 | lseg, lseg->pls_range.iomode, | 1788 | lseg, lseg->pls_range.iomode, |
1763 | lseg->pls_range.offset, | 1789 | lseg->pls_range.offset, |
1764 | lseg->pls_range.length); | 1790 | lseg->pls_range.length); |
1791 | if (mark_lseg_invalid(lseg, tmp_list)) | ||
1792 | continue; | ||
1793 | remaining++; | ||
1765 | set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags); | 1794 | set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags); |
1766 | pnfs_set_plh_return_iomode(lo, return_range->iomode); | 1795 | pnfs_set_plh_return_iomode(lo, return_range->iomode); |
1767 | if (!mark_lseg_invalid(lseg, tmp_list)) | ||
1768 | remaining++; | ||
1769 | set_bit(NFS_LAYOUT_RETURN_REQUESTED, | ||
1770 | &lo->plh_flags); | ||
1771 | } | 1796 | } |
1772 | return remaining; | 1797 | return remaining; |
1773 | } | 1798 | } |
diff --git a/fs/notify/mark.c b/fs/notify/mark.c index cfcbf114676e..7115c5d7d373 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c | |||
@@ -91,7 +91,14 @@ | |||
91 | #include <linux/fsnotify_backend.h> | 91 | #include <linux/fsnotify_backend.h> |
92 | #include "fsnotify.h" | 92 | #include "fsnotify.h" |
93 | 93 | ||
94 | #define FSNOTIFY_REAPER_DELAY (1) /* 1 jiffy */ | ||
95 | |||
94 | struct srcu_struct fsnotify_mark_srcu; | 96 | struct srcu_struct fsnotify_mark_srcu; |
97 | static DEFINE_SPINLOCK(destroy_lock); | ||
98 | static LIST_HEAD(destroy_list); | ||
99 | |||
100 | static void fsnotify_mark_destroy(struct work_struct *work); | ||
101 | static DECLARE_DELAYED_WORK(reaper_work, fsnotify_mark_destroy); | ||
95 | 102 | ||
96 | void fsnotify_get_mark(struct fsnotify_mark *mark) | 103 | void fsnotify_get_mark(struct fsnotify_mark *mark) |
97 | { | 104 | { |
@@ -165,19 +172,10 @@ void fsnotify_detach_mark(struct fsnotify_mark *mark) | |||
165 | atomic_dec(&group->num_marks); | 172 | atomic_dec(&group->num_marks); |
166 | } | 173 | } |
167 | 174 | ||
168 | static void | ||
169 | fsnotify_mark_free_rcu(struct rcu_head *rcu) | ||
170 | { | ||
171 | struct fsnotify_mark *mark; | ||
172 | |||
173 | mark = container_of(rcu, struct fsnotify_mark, g_rcu); | ||
174 | fsnotify_put_mark(mark); | ||
175 | } | ||
176 | |||
177 | /* | 175 | /* |
178 | * Free fsnotify mark. The freeing is actually happening from a call_srcu | 176 | * Free fsnotify mark. The freeing is actually happening from a kthread which |
179 | * callback. Caller must have a reference to the mark or be protected by | 177 | * first waits for srcu period end. Caller must have a reference to the mark |
180 | * fsnotify_mark_srcu. | 178 | * or be protected by fsnotify_mark_srcu. |
181 | */ | 179 | */ |
182 | void fsnotify_free_mark(struct fsnotify_mark *mark) | 180 | void fsnotify_free_mark(struct fsnotify_mark *mark) |
183 | { | 181 | { |
@@ -192,7 +190,11 @@ void fsnotify_free_mark(struct fsnotify_mark *mark) | |||
192 | mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE; | 190 | mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE; |
193 | spin_unlock(&mark->lock); | 191 | spin_unlock(&mark->lock); |
194 | 192 | ||
195 | call_srcu(&fsnotify_mark_srcu, &mark->g_rcu, fsnotify_mark_free_rcu); | 193 | spin_lock(&destroy_lock); |
194 | list_add(&mark->g_list, &destroy_list); | ||
195 | spin_unlock(&destroy_lock); | ||
196 | queue_delayed_work(system_unbound_wq, &reaper_work, | ||
197 | FSNOTIFY_REAPER_DELAY); | ||
196 | 198 | ||
197 | /* | 199 | /* |
198 | * Some groups like to know that marks are being freed. This is a | 200 | * Some groups like to know that marks are being freed. This is a |
@@ -388,7 +390,12 @@ err: | |||
388 | 390 | ||
389 | spin_unlock(&mark->lock); | 391 | spin_unlock(&mark->lock); |
390 | 392 | ||
391 | call_srcu(&fsnotify_mark_srcu, &mark->g_rcu, fsnotify_mark_free_rcu); | 393 | spin_lock(&destroy_lock); |
394 | list_add(&mark->g_list, &destroy_list); | ||
395 | spin_unlock(&destroy_lock); | ||
396 | queue_delayed_work(system_unbound_wq, &reaper_work, | ||
397 | FSNOTIFY_REAPER_DELAY); | ||
398 | |||
392 | return ret; | 399 | return ret; |
393 | } | 400 | } |
394 | 401 | ||
@@ -491,3 +498,21 @@ void fsnotify_init_mark(struct fsnotify_mark *mark, | |||
491 | atomic_set(&mark->refcnt, 1); | 498 | atomic_set(&mark->refcnt, 1); |
492 | mark->free_mark = free_mark; | 499 | mark->free_mark = free_mark; |
493 | } | 500 | } |
501 | |||
502 | static void fsnotify_mark_destroy(struct work_struct *work) | ||
503 | { | ||
504 | struct fsnotify_mark *mark, *next; | ||
505 | struct list_head private_destroy_list; | ||
506 | |||
507 | spin_lock(&destroy_lock); | ||
508 | /* exchange the list head */ | ||
509 | list_replace_init(&destroy_list, &private_destroy_list); | ||
510 | spin_unlock(&destroy_lock); | ||
511 | |||
512 | synchronize_srcu(&fsnotify_mark_srcu); | ||
513 | |||
514 | list_for_each_entry_safe(mark, next, &private_destroy_list, g_list) { | ||
515 | list_del_init(&mark->g_list); | ||
516 | fsnotify_put_mark(mark); | ||
517 | } | ||
518 | } | ||
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 794fd1587f34..cda0361e95a4 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
@@ -956,6 +956,7 @@ clean_orphan: | |||
956 | tmp_ret = ocfs2_del_inode_from_orphan(osb, inode, di_bh, | 956 | tmp_ret = ocfs2_del_inode_from_orphan(osb, inode, di_bh, |
957 | update_isize, end); | 957 | update_isize, end); |
958 | if (tmp_ret < 0) { | 958 | if (tmp_ret < 0) { |
959 | ocfs2_inode_unlock(inode, 1); | ||
959 | ret = tmp_ret; | 960 | ret = tmp_ret; |
960 | mlog_errno(ret); | 961 | mlog_errno(ret); |
961 | brelse(di_bh); | 962 | brelse(di_bh); |
diff --git a/fs/pnode.c b/fs/pnode.c index 6367e1e435c6..c524fdddc7fb 100644 --- a/fs/pnode.c +++ b/fs/pnode.c | |||
@@ -202,6 +202,11 @@ static struct mount *last_dest, *last_source, *dest_master; | |||
202 | static struct mountpoint *mp; | 202 | static struct mountpoint *mp; |
203 | static struct hlist_head *list; | 203 | static struct hlist_head *list; |
204 | 204 | ||
205 | static inline bool peers(struct mount *m1, struct mount *m2) | ||
206 | { | ||
207 | return m1->mnt_group_id == m2->mnt_group_id && m1->mnt_group_id; | ||
208 | } | ||
209 | |||
205 | static int propagate_one(struct mount *m) | 210 | static int propagate_one(struct mount *m) |
206 | { | 211 | { |
207 | struct mount *child; | 212 | struct mount *child; |
@@ -212,7 +217,7 @@ static int propagate_one(struct mount *m) | |||
212 | /* skip if mountpoint isn't covered by it */ | 217 | /* skip if mountpoint isn't covered by it */ |
213 | if (!is_subdir(mp->m_dentry, m->mnt.mnt_root)) | 218 | if (!is_subdir(mp->m_dentry, m->mnt.mnt_root)) |
214 | return 0; | 219 | return 0; |
215 | if (m->mnt_group_id == last_dest->mnt_group_id) { | 220 | if (peers(m, last_dest)) { |
216 | type = CL_MAKE_SHARED; | 221 | type = CL_MAKE_SHARED; |
217 | } else { | 222 | } else { |
218 | struct mount *n, *p; | 223 | struct mount *n, *p; |
@@ -223,7 +228,7 @@ static int propagate_one(struct mount *m) | |||
223 | last_source = last_source->mnt_master; | 228 | last_source = last_source->mnt_master; |
224 | last_dest = last_source->mnt_parent; | 229 | last_dest = last_source->mnt_parent; |
225 | } | 230 | } |
226 | if (n->mnt_group_id != last_dest->mnt_group_id) { | 231 | if (!peers(n, last_dest)) { |
227 | last_source = last_source->mnt_master; | 232 | last_source = last_source->mnt_master; |
228 | last_dest = last_source->mnt_parent; | 233 | last_dest = last_source->mnt_parent; |
229 | } | 234 | } |
diff --git a/fs/read_write.c b/fs/read_write.c index 324ec271cc4e..dadf24e5c95b 100644 --- a/fs/read_write.c +++ b/fs/read_write.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <linux/splice.h> | 17 | #include <linux/splice.h> |
18 | #include <linux/compat.h> | 18 | #include <linux/compat.h> |
19 | #include <linux/mount.h> | 19 | #include <linux/mount.h> |
20 | #include <linux/fs.h> | ||
20 | #include "internal.h" | 21 | #include "internal.h" |
21 | 22 | ||
22 | #include <asm/uaccess.h> | 23 | #include <asm/uaccess.h> |
@@ -183,7 +184,7 @@ loff_t no_seek_end_llseek(struct file *file, loff_t offset, int whence) | |||
183 | switch (whence) { | 184 | switch (whence) { |
184 | case SEEK_SET: case SEEK_CUR: | 185 | case SEEK_SET: case SEEK_CUR: |
185 | return generic_file_llseek_size(file, offset, whence, | 186 | return generic_file_llseek_size(file, offset, whence, |
186 | ~0ULL, 0); | 187 | OFFSET_MAX, 0); |
187 | default: | 188 | default: |
188 | return -EINVAL; | 189 | return -EINVAL; |
189 | } | 190 | } |
@@ -1532,10 +1533,12 @@ int vfs_clone_file_range(struct file *file_in, loff_t pos_in, | |||
1532 | 1533 | ||
1533 | if (!(file_in->f_mode & FMODE_READ) || | 1534 | if (!(file_in->f_mode & FMODE_READ) || |
1534 | !(file_out->f_mode & FMODE_WRITE) || | 1535 | !(file_out->f_mode & FMODE_WRITE) || |
1535 | (file_out->f_flags & O_APPEND) || | 1536 | (file_out->f_flags & O_APPEND)) |
1536 | !file_in->f_op->clone_file_range) | ||
1537 | return -EBADF; | 1537 | return -EBADF; |
1538 | 1538 | ||
1539 | if (!file_in->f_op->clone_file_range) | ||
1540 | return -EOPNOTSUPP; | ||
1541 | |||
1539 | ret = clone_verify_area(file_in, pos_in, len, false); | 1542 | ret = clone_verify_area(file_in, pos_in, len, false); |
1540 | if (ret) | 1543 | if (ret) |
1541 | return ret; | 1544 | return ret; |
diff --git a/fs/super.c b/fs/super.c index 1182af8fd5ff..74914b1bae70 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -415,6 +415,7 @@ void generic_shutdown_super(struct super_block *sb) | |||
415 | sb->s_flags &= ~MS_ACTIVE; | 415 | sb->s_flags &= ~MS_ACTIVE; |
416 | 416 | ||
417 | fsnotify_unmount_inodes(sb); | 417 | fsnotify_unmount_inodes(sb); |
418 | cgroup_writeback_umount(); | ||
418 | 419 | ||
419 | evict_inodes(sb); | 420 | evict_inodes(sb); |
420 | 421 | ||
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 50311703135b..66cdb44616d5 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c | |||
@@ -287,6 +287,12 @@ int handle_userfault(struct vm_area_struct *vma, unsigned long address, | |||
287 | goto out; | 287 | goto out; |
288 | 288 | ||
289 | /* | 289 | /* |
290 | * We don't do userfault handling for the final child pid update. | ||
291 | */ | ||
292 | if (current->flags & PF_EXITING) | ||
293 | goto out; | ||
294 | |||
295 | /* | ||
290 | * Check that we can return VM_FAULT_RETRY. | 296 | * Check that we can return VM_FAULT_RETRY. |
291 | * | 297 | * |
292 | * NOTE: it should become possible to return VM_FAULT_RETRY | 298 | * NOTE: it should become possible to return VM_FAULT_RETRY |
diff --git a/fs/xattr.c b/fs/xattr.c index 07d0e47f6a7f..4861322e28e8 100644 --- a/fs/xattr.c +++ b/fs/xattr.c | |||
@@ -940,7 +940,7 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs, | |||
940 | bool trusted = capable(CAP_SYS_ADMIN); | 940 | bool trusted = capable(CAP_SYS_ADMIN); |
941 | struct simple_xattr *xattr; | 941 | struct simple_xattr *xattr; |
942 | ssize_t remaining_size = size; | 942 | ssize_t remaining_size = size; |
943 | int err; | 943 | int err = 0; |
944 | 944 | ||
945 | #ifdef CONFIG_FS_POSIX_ACL | 945 | #ifdef CONFIG_FS_POSIX_ACL |
946 | if (inode->i_acl) { | 946 | if (inode->i_acl) { |
@@ -965,11 +965,11 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs, | |||
965 | 965 | ||
966 | err = xattr_list_one(&buffer, &remaining_size, xattr->name); | 966 | err = xattr_list_one(&buffer, &remaining_size, xattr->name); |
967 | if (err) | 967 | if (err) |
968 | return err; | 968 | break; |
969 | } | 969 | } |
970 | spin_unlock(&xattrs->lock); | 970 | spin_unlock(&xattrs->lock); |
971 | 971 | ||
972 | return size - remaining_size; | 972 | return err ? err : size - remaining_size; |
973 | } | 973 | } |
974 | 974 | ||
975 | /* | 975 | /* |
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 379c089fb051..a9ebabfe7587 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c | |||
@@ -55,7 +55,7 @@ xfs_count_page_state( | |||
55 | } while ((bh = bh->b_this_page) != head); | 55 | } while ((bh = bh->b_this_page) != head); |
56 | } | 56 | } |
57 | 57 | ||
58 | STATIC struct block_device * | 58 | struct block_device * |
59 | xfs_find_bdev_for_inode( | 59 | xfs_find_bdev_for_inode( |
60 | struct inode *inode) | 60 | struct inode *inode) |
61 | { | 61 | { |
@@ -1208,6 +1208,10 @@ xfs_vm_writepages( | |||
1208 | struct writeback_control *wbc) | 1208 | struct writeback_control *wbc) |
1209 | { | 1209 | { |
1210 | xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); | 1210 | xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); |
1211 | if (dax_mapping(mapping)) | ||
1212 | return dax_writeback_mapping_range(mapping, | ||
1213 | xfs_find_bdev_for_inode(mapping->host), wbc); | ||
1214 | |||
1211 | return generic_writepages(mapping, wbc); | 1215 | return generic_writepages(mapping, wbc); |
1212 | } | 1216 | } |
1213 | 1217 | ||
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h index f6ffc9ae5ceb..a4343c63fb38 100644 --- a/fs/xfs/xfs_aops.h +++ b/fs/xfs/xfs_aops.h | |||
@@ -62,5 +62,6 @@ int xfs_get_blocks_dax_fault(struct inode *inode, sector_t offset, | |||
62 | struct buffer_head *map_bh, int create); | 62 | struct buffer_head *map_bh, int create); |
63 | 63 | ||
64 | extern void xfs_count_page_state(struct page *, int *, int *); | 64 | extern void xfs_count_page_state(struct page *, int *, int *); |
65 | extern struct block_device *xfs_find_bdev_for_inode(struct inode *); | ||
65 | 66 | ||
66 | #endif /* __XFS_AOPS_H__ */ | 67 | #endif /* __XFS_AOPS_H__ */ |
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 45ec9e40150c..6c876012b2e5 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c | |||
@@ -75,7 +75,8 @@ xfs_zero_extent( | |||
75 | ssize_t size = XFS_FSB_TO_B(mp, count_fsb); | 75 | ssize_t size = XFS_FSB_TO_B(mp, count_fsb); |
76 | 76 | ||
77 | if (IS_DAX(VFS_I(ip))) | 77 | if (IS_DAX(VFS_I(ip))) |
78 | return dax_clear_blocks(VFS_I(ip), block, size); | 78 | return dax_clear_sectors(xfs_find_bdev_for_inode(VFS_I(ip)), |
79 | sector, size); | ||
79 | 80 | ||
80 | /* | 81 | /* |
81 | * let the block layer decide on the fastest method of | 82 | * let the block layer decide on the fastest method of |