aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorPaul Mundt <lethal@linux-sh.org>2011-01-06 04:24:07 -0500
committerPaul Mundt <lethal@linux-sh.org>2011-01-06 04:24:07 -0500
commitf862f904d357dc0d3612347a8dbabe6fae037fbb (patch)
treed7f5c2d5f85fd9e1cfc36beae904dc4f9cca04a3 /fs
parent6f09e41d704fe0bc9157a5357480751d39361d01 (diff)
parent3c0eee3fe6a3a1c745379547c7e7c904aa64f6d5 (diff)
Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/torvalds/linux-2.6 into sh-latest
Conflicts: arch/sh/kernel/cpu/sh2a/clock-sh7201.c Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/disk-io.c11
-rw-r--r--fs/btrfs/export.c2
-rw-r--r--fs/btrfs/extent-tree.c75
-rw-r--r--fs/btrfs/file.c92
-rw-r--r--fs/btrfs/free-space-cache.c12
-rw-r--r--fs/btrfs/inode.c11
-rw-r--r--fs/btrfs/ioctl.c56
-rw-r--r--fs/btrfs/ioctl.h14
-rw-r--r--fs/btrfs/orphan.c6
-rw-r--r--fs/btrfs/super.c2
-rw-r--r--fs/btrfs/volumes.c20
-rw-r--r--fs/btrfs/volumes.h2
-rw-r--r--fs/ceph/dir.c7
-rw-r--r--fs/ceph/file.c39
-rw-r--r--fs/ceph/ioctl.h2
-rw-r--r--fs/ceph/locks.c94
-rw-r--r--fs/ceph/mds_client.c41
-rw-r--r--fs/ceph/mds_client.h31
-rw-r--r--fs/cifs/Makefile4
-rw-r--r--fs/cifs/README9
-rw-r--r--fs/cifs/cifs_fs_sb.h1
-rw-r--r--fs/cifs/cifsacl.c3
-rw-r--r--fs/cifs/cifsacl.h4
-rw-r--r--fs/cifs/cifsfs.c3
-rw-r--r--fs/cifs/cifsglob.h12
-rw-r--r--fs/cifs/cifsproto.h5
-rw-r--r--fs/cifs/cifssmb.c183
-rw-r--r--fs/cifs/connect.c42
-rw-r--r--fs/cifs/file.c2
-rw-r--r--fs/cifs/inode.c21
-rw-r--r--fs/cifs/readdir.c12
-rw-r--r--fs/exec.c5
-rw-r--r--fs/ext4/ext4.h1
-rw-r--r--fs/ext4/inode.c5
-rw-r--r--fs/ext4/namei.c2
-rw-r--r--fs/ext4/resize.c5
-rw-r--r--fs/ext4/super.c14
-rw-r--r--fs/fuse/file.c72
-rw-r--r--fs/logfs/journal.c2
-rw-r--r--fs/logfs/readwrite.c3
-rw-r--r--fs/namei.c3
-rw-r--r--fs/nfs/dir.c76
-rw-r--r--fs/nfs/file.c2
-rw-r--r--fs/nfs/inode.c1
-rw-r--r--fs/nfs/mount_clnt.c4
-rw-r--r--fs/nfs/nfs4proc.c9
-rw-r--r--fs/nfs/pagelist.c4
-rw-r--r--fs/nfs/read.c1
-rw-r--r--fs/nfs/super.c4
-rw-r--r--fs/nfs/write.c3
-rw-r--r--fs/nfsd/nfs3xdr.c6
-rw-r--r--fs/nfsd/xdr4.h21
-rw-r--r--fs/nilfs2/gcinode.c9
-rw-r--r--fs/nilfs2/ioctl.c12
-rw-r--r--fs/notify/fanotify/fanotify.c6
-rw-r--r--fs/notify/fanotify/fanotify_user.c81
-rw-r--r--fs/notify/inotify/inotify_user.c1
-rw-r--r--fs/ocfs2/aops.c7
-rw-r--r--fs/ocfs2/aops.h23
-rw-r--r--fs/ocfs2/cluster/masklog.c3
-rw-r--r--fs/ocfs2/cluster/masklog.h15
-rw-r--r--fs/ocfs2/dir.c4
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c40
-rw-r--r--fs/ocfs2/file.c15
-rw-r--r--fs/ocfs2/ocfs2_fs.h2
-rw-r--r--fs/xfs/xfs_rename.c1
66 files changed, 820 insertions, 460 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index c547cca26a26..51d2e4de34eb 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -696,6 +696,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
696 __btree_submit_bio_done); 696 __btree_submit_bio_done);
697} 697}
698 698
699#ifdef CONFIG_MIGRATION
699static int btree_migratepage(struct address_space *mapping, 700static int btree_migratepage(struct address_space *mapping,
700 struct page *newpage, struct page *page) 701 struct page *newpage, struct page *page)
701{ 702{
@@ -712,12 +713,9 @@ static int btree_migratepage(struct address_space *mapping,
712 if (page_has_private(page) && 713 if (page_has_private(page) &&
713 !try_to_release_page(page, GFP_KERNEL)) 714 !try_to_release_page(page, GFP_KERNEL))
714 return -EAGAIN; 715 return -EAGAIN;
715#ifdef CONFIG_MIGRATION
716 return migrate_page(mapping, newpage, page); 716 return migrate_page(mapping, newpage, page);
717#else
718 return -ENOSYS;
719#endif
720} 717}
718#endif
721 719
722static int btree_writepage(struct page *page, struct writeback_control *wbc) 720static int btree_writepage(struct page *page, struct writeback_control *wbc)
723{ 721{
@@ -1009,7 +1007,10 @@ static int find_and_setup_root(struct btrfs_root *tree_root,
1009 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); 1007 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
1010 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), 1008 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
1011 blocksize, generation); 1009 blocksize, generation);
1012 BUG_ON(!root->node); 1010 if (!root->node || !btrfs_buffer_uptodate(root->node, generation)) {
1011 free_extent_buffer(root->node);
1012 return -EIO;
1013 }
1013 root->commit_root = btrfs_root_node(root); 1014 root->commit_root = btrfs_root_node(root);
1014 return 0; 1015 return 0;
1015} 1016}
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 6f0444473594..659f532d26a0 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -166,7 +166,7 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
166static struct dentry *btrfs_get_parent(struct dentry *child) 166static struct dentry *btrfs_get_parent(struct dentry *child)
167{ 167{
168 struct inode *dir = child->d_inode; 168 struct inode *dir = child->d_inode;
169 static struct dentry *dentry; 169 struct dentry *dentry;
170 struct btrfs_root *root = BTRFS_I(dir)->root; 170 struct btrfs_root *root = BTRFS_I(dir)->root;
171 struct btrfs_path *path; 171 struct btrfs_path *path;
172 struct extent_buffer *leaf; 172 struct extent_buffer *leaf;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index bcd59c7dfb57..227e5815d838 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -429,6 +429,7 @@ err:
429 429
430static int cache_block_group(struct btrfs_block_group_cache *cache, 430static int cache_block_group(struct btrfs_block_group_cache *cache,
431 struct btrfs_trans_handle *trans, 431 struct btrfs_trans_handle *trans,
432 struct btrfs_root *root,
432 int load_cache_only) 433 int load_cache_only)
433{ 434{
434 struct btrfs_fs_info *fs_info = cache->fs_info; 435 struct btrfs_fs_info *fs_info = cache->fs_info;
@@ -442,9 +443,12 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
442 443
443 /* 444 /*
444 * We can't do the read from on-disk cache during a commit since we need 445 * We can't do the read from on-disk cache during a commit since we need
445 * to have the normal tree locking. 446 * to have the normal tree locking. Also if we are currently trying to
447 * allocate blocks for the tree root we can't do the fast caching since
448 * we likely hold important locks.
446 */ 449 */
447 if (!trans->transaction->in_commit) { 450 if (!trans->transaction->in_commit &&
451 (root && root != root->fs_info->tree_root)) {
448 spin_lock(&cache->lock); 452 spin_lock(&cache->lock);
449 if (cache->cached != BTRFS_CACHE_NO) { 453 if (cache->cached != BTRFS_CACHE_NO) {
450 spin_unlock(&cache->lock); 454 spin_unlock(&cache->lock);
@@ -2741,6 +2745,7 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
2741 struct btrfs_root *root = block_group->fs_info->tree_root; 2745 struct btrfs_root *root = block_group->fs_info->tree_root;
2742 struct inode *inode = NULL; 2746 struct inode *inode = NULL;
2743 u64 alloc_hint = 0; 2747 u64 alloc_hint = 0;
2748 int dcs = BTRFS_DC_ERROR;
2744 int num_pages = 0; 2749 int num_pages = 0;
2745 int retries = 0; 2750 int retries = 0;
2746 int ret = 0; 2751 int ret = 0;
@@ -2795,6 +2800,8 @@ again:
2795 2800
2796 spin_lock(&block_group->lock); 2801 spin_lock(&block_group->lock);
2797 if (block_group->cached != BTRFS_CACHE_FINISHED) { 2802 if (block_group->cached != BTRFS_CACHE_FINISHED) {
2803 /* We're not cached, don't bother trying to write stuff out */
2804 dcs = BTRFS_DC_WRITTEN;
2798 spin_unlock(&block_group->lock); 2805 spin_unlock(&block_group->lock);
2799 goto out_put; 2806 goto out_put;
2800 } 2807 }
@@ -2821,6 +2828,8 @@ again:
2821 ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages, 2828 ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
2822 num_pages, num_pages, 2829 num_pages, num_pages,
2823 &alloc_hint); 2830 &alloc_hint);
2831 if (!ret)
2832 dcs = BTRFS_DC_SETUP;
2824 btrfs_free_reserved_data_space(inode, num_pages); 2833 btrfs_free_reserved_data_space(inode, num_pages);
2825out_put: 2834out_put:
2826 iput(inode); 2835 iput(inode);
@@ -2828,10 +2837,7 @@ out_free:
2828 btrfs_release_path(root, path); 2837 btrfs_release_path(root, path);
2829out: 2838out:
2830 spin_lock(&block_group->lock); 2839 spin_lock(&block_group->lock);
2831 if (ret) 2840 block_group->disk_cache_state = dcs;
2832 block_group->disk_cache_state = BTRFS_DC_ERROR;
2833 else
2834 block_group->disk_cache_state = BTRFS_DC_SETUP;
2835 spin_unlock(&block_group->lock); 2841 spin_unlock(&block_group->lock);
2836 2842
2837 return ret; 2843 return ret;
@@ -3037,7 +3043,13 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
3037 3043
3038u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) 3044u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
3039{ 3045{
3040 u64 num_devices = root->fs_info->fs_devices->rw_devices; 3046 /*
3047 * we add in the count of missing devices because we want
3048 * to make sure that any RAID levels on a degraded FS
3049 * continue to be honored.
3050 */
3051 u64 num_devices = root->fs_info->fs_devices->rw_devices +
3052 root->fs_info->fs_devices->missing_devices;
3041 3053
3042 if (num_devices == 1) 3054 if (num_devices == 1)
3043 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0); 3055 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
@@ -4080,7 +4092,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
4080 * space back to the block group, otherwise we will leak space. 4092 * space back to the block group, otherwise we will leak space.
4081 */ 4093 */
4082 if (!alloc && cache->cached == BTRFS_CACHE_NO) 4094 if (!alloc && cache->cached == BTRFS_CACHE_NO)
4083 cache_block_group(cache, trans, 1); 4095 cache_block_group(cache, trans, NULL, 1);
4084 4096
4085 byte_in_group = bytenr - cache->key.objectid; 4097 byte_in_group = bytenr - cache->key.objectid;
4086 WARN_ON(byte_in_group > cache->key.offset); 4098 WARN_ON(byte_in_group > cache->key.offset);
@@ -4930,11 +4942,31 @@ search:
4930 btrfs_get_block_group(block_group); 4942 btrfs_get_block_group(block_group);
4931 search_start = block_group->key.objectid; 4943 search_start = block_group->key.objectid;
4932 4944
4945 /*
4946 * this can happen if we end up cycling through all the
4947 * raid types, but we want to make sure we only allocate
4948 * for the proper type.
4949 */
4950 if (!block_group_bits(block_group, data)) {
4951 u64 extra = BTRFS_BLOCK_GROUP_DUP |
4952 BTRFS_BLOCK_GROUP_RAID1 |
4953 BTRFS_BLOCK_GROUP_RAID10;
4954
4955 /*
4956 * if they asked for extra copies and this block group
4957 * doesn't provide them, bail. This does allow us to
4958 * fill raid0 from raid1.
4959 */
4960 if ((data & extra) && !(block_group->flags & extra))
4961 goto loop;
4962 }
4963
4933have_block_group: 4964have_block_group:
4934 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { 4965 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
4935 u64 free_percent; 4966 u64 free_percent;
4936 4967
4937 ret = cache_block_group(block_group, trans, 1); 4968 ret = cache_block_group(block_group, trans,
4969 orig_root, 1);
4938 if (block_group->cached == BTRFS_CACHE_FINISHED) 4970 if (block_group->cached == BTRFS_CACHE_FINISHED)
4939 goto have_block_group; 4971 goto have_block_group;
4940 4972
@@ -4958,7 +4990,8 @@ have_block_group:
4958 if (loop > LOOP_CACHING_NOWAIT || 4990 if (loop > LOOP_CACHING_NOWAIT ||
4959 (loop > LOOP_FIND_IDEAL && 4991 (loop > LOOP_FIND_IDEAL &&
4960 atomic_read(&space_info->caching_threads) < 2)) { 4992 atomic_read(&space_info->caching_threads) < 2)) {
4961 ret = cache_block_group(block_group, trans, 0); 4993 ret = cache_block_group(block_group, trans,
4994 orig_root, 0);
4962 BUG_ON(ret); 4995 BUG_ON(ret);
4963 } 4996 }
4964 found_uncached_bg = true; 4997 found_uncached_bg = true;
@@ -5515,7 +5548,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
5515 u64 num_bytes = ins->offset; 5548 u64 num_bytes = ins->offset;
5516 5549
5517 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); 5550 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
5518 cache_block_group(block_group, trans, 0); 5551 cache_block_group(block_group, trans, NULL, 0);
5519 caching_ctl = get_caching_control(block_group); 5552 caching_ctl = get_caching_control(block_group);
5520 5553
5521 if (!caching_ctl) { 5554 if (!caching_ctl) {
@@ -6300,9 +6333,13 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
6300 NULL, NULL); 6333 NULL, NULL);
6301 BUG_ON(ret < 0); 6334 BUG_ON(ret < 0);
6302 if (ret > 0) { 6335 if (ret > 0) {
6303 ret = btrfs_del_orphan_item(trans, tree_root, 6336 /* if we fail to delete the orphan item this time
6304 root->root_key.objectid); 6337 * around, it'll get picked up the next time.
6305 BUG_ON(ret); 6338 *
6339 * The most common failure here is just -ENOENT.
6340 */
6341 btrfs_del_orphan_item(trans, tree_root,
6342 root->root_key.objectid);
6306 } 6343 }
6307 } 6344 }
6308 6345
@@ -7878,7 +7915,14 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
7878 u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | 7915 u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
7879 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; 7916 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
7880 7917
7881 num_devices = root->fs_info->fs_devices->rw_devices; 7918 /*
7919 * we add in the count of missing devices because we want
7920 * to make sure that any RAID levels on a degraded FS
7921 * continue to be honored.
7922 */
7923 num_devices = root->fs_info->fs_devices->rw_devices +
7924 root->fs_info->fs_devices->missing_devices;
7925
7882 if (num_devices == 1) { 7926 if (num_devices == 1) {
7883 stripped |= BTRFS_BLOCK_GROUP_DUP; 7927 stripped |= BTRFS_BLOCK_GROUP_DUP;
7884 stripped = flags & ~stripped; 7928 stripped = flags & ~stripped;
@@ -8247,7 +8291,6 @@ int btrfs_read_block_groups(struct btrfs_root *root)
8247 break; 8291 break;
8248 if (ret != 0) 8292 if (ret != 0)
8249 goto error; 8293 goto error;
8250
8251 leaf = path->nodes[0]; 8294 leaf = path->nodes[0];
8252 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 8295 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
8253 cache = kzalloc(sizeof(*cache), GFP_NOFS); 8296 cache = kzalloc(sizeof(*cache), GFP_NOFS);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index c1faded5fca0..66836d85763b 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -48,30 +48,34 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
48 struct page **prepared_pages, 48 struct page **prepared_pages,
49 struct iov_iter *i) 49 struct iov_iter *i)
50{ 50{
51 size_t copied; 51 size_t copied = 0;
52 int pg = 0; 52 int pg = 0;
53 int offset = pos & (PAGE_CACHE_SIZE - 1); 53 int offset = pos & (PAGE_CACHE_SIZE - 1);
54 int total_copied = 0;
54 55
55 while (write_bytes > 0) { 56 while (write_bytes > 0) {
56 size_t count = min_t(size_t, 57 size_t count = min_t(size_t,
57 PAGE_CACHE_SIZE - offset, write_bytes); 58 PAGE_CACHE_SIZE - offset, write_bytes);
58 struct page *page = prepared_pages[pg]; 59 struct page *page = prepared_pages[pg];
59again: 60 /*
60 if (unlikely(iov_iter_fault_in_readable(i, count))) 61 * Copy data from userspace to the current page
61 return -EFAULT; 62 *
62 63 * Disable pagefault to avoid recursive lock since
63 /* Copy data from userspace to the current page */ 64 * the pages are already locked
64 copied = iov_iter_copy_from_user(page, i, offset, count); 65 */
66 pagefault_disable();
67 copied = iov_iter_copy_from_user_atomic(page, i, offset, count);
68 pagefault_enable();
65 69
66 /* Flush processor's dcache for this page */ 70 /* Flush processor's dcache for this page */
67 flush_dcache_page(page); 71 flush_dcache_page(page);
68 iov_iter_advance(i, copied); 72 iov_iter_advance(i, copied);
69 write_bytes -= copied; 73 write_bytes -= copied;
74 total_copied += copied;
70 75
76 /* Return to btrfs_file_aio_write to fault page */
71 if (unlikely(copied == 0)) { 77 if (unlikely(copied == 0)) {
72 count = min_t(size_t, PAGE_CACHE_SIZE - offset, 78 break;
73 iov_iter_single_seg_count(i));
74 goto again;
75 } 79 }
76 80
77 if (unlikely(copied < PAGE_CACHE_SIZE - offset)) { 81 if (unlikely(copied < PAGE_CACHE_SIZE - offset)) {
@@ -81,7 +85,7 @@ again:
81 offset = 0; 85 offset = 0;
82 } 86 }
83 } 87 }
84 return 0; 88 return total_copied;
85} 89}
86 90
87/* 91/*
@@ -854,6 +858,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
854 unsigned long last_index; 858 unsigned long last_index;
855 int will_write; 859 int will_write;
856 int buffered = 0; 860 int buffered = 0;
861 int copied = 0;
862 int dirty_pages = 0;
857 863
858 will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || 864 will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
859 (file->f_flags & O_DIRECT)); 865 (file->f_flags & O_DIRECT));
@@ -970,7 +976,17 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
970 WARN_ON(num_pages > nrptrs); 976 WARN_ON(num_pages > nrptrs);
971 memset(pages, 0, sizeof(struct page *) * nrptrs); 977 memset(pages, 0, sizeof(struct page *) * nrptrs);
972 978
973 ret = btrfs_delalloc_reserve_space(inode, write_bytes); 979 /*
980 * Fault pages before locking them in prepare_pages
981 * to avoid recursive lock
982 */
983 if (unlikely(iov_iter_fault_in_readable(&i, write_bytes))) {
984 ret = -EFAULT;
985 goto out;
986 }
987
988 ret = btrfs_delalloc_reserve_space(inode,
989 num_pages << PAGE_CACHE_SHIFT);
974 if (ret) 990 if (ret)
975 goto out; 991 goto out;
976 992
@@ -978,37 +994,49 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
978 pos, first_index, last_index, 994 pos, first_index, last_index,
979 write_bytes); 995 write_bytes);
980 if (ret) { 996 if (ret) {
981 btrfs_delalloc_release_space(inode, write_bytes); 997 btrfs_delalloc_release_space(inode,
998 num_pages << PAGE_CACHE_SHIFT);
982 goto out; 999 goto out;
983 } 1000 }
984 1001
985 ret = btrfs_copy_from_user(pos, num_pages, 1002 copied = btrfs_copy_from_user(pos, num_pages,
986 write_bytes, pages, &i); 1003 write_bytes, pages, &i);
987 if (ret == 0) { 1004 dirty_pages = (copied + PAGE_CACHE_SIZE - 1) >>
1005 PAGE_CACHE_SHIFT;
1006
1007 if (num_pages > dirty_pages) {
1008 if (copied > 0)
1009 atomic_inc(
1010 &BTRFS_I(inode)->outstanding_extents);
1011 btrfs_delalloc_release_space(inode,
1012 (num_pages - dirty_pages) <<
1013 PAGE_CACHE_SHIFT);
1014 }
1015
1016 if (copied > 0) {
988 dirty_and_release_pages(NULL, root, file, pages, 1017 dirty_and_release_pages(NULL, root, file, pages,
989 num_pages, pos, write_bytes); 1018 dirty_pages, pos, copied);
990 } 1019 }
991 1020
992 btrfs_drop_pages(pages, num_pages); 1021 btrfs_drop_pages(pages, num_pages);
993 if (ret) {
994 btrfs_delalloc_release_space(inode, write_bytes);
995 goto out;
996 }
997 1022
998 if (will_write) { 1023 if (copied > 0) {
999 filemap_fdatawrite_range(inode->i_mapping, pos, 1024 if (will_write) {
1000 pos + write_bytes - 1); 1025 filemap_fdatawrite_range(inode->i_mapping, pos,
1001 } else { 1026 pos + copied - 1);
1002 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1027 } else {
1003 num_pages); 1028 balance_dirty_pages_ratelimited_nr(
1004 if (num_pages < 1029 inode->i_mapping,
1005 (root->leafsize >> PAGE_CACHE_SHIFT) + 1) 1030 dirty_pages);
1006 btrfs_btree_balance_dirty(root, 1); 1031 if (dirty_pages <
1007 btrfs_throttle(root); 1032 (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
1033 btrfs_btree_balance_dirty(root, 1);
1034 btrfs_throttle(root);
1035 }
1008 } 1036 }
1009 1037
1010 pos += write_bytes; 1038 pos += copied;
1011 num_written += write_bytes; 1039 num_written += copied;
1012 1040
1013 cond_resched(); 1041 cond_resched();
1014 } 1042 }
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 22ee0dc2e6b8..60d684266959 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -290,7 +290,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
290 (unsigned long long)BTRFS_I(inode)->generation, 290 (unsigned long long)BTRFS_I(inode)->generation,
291 (unsigned long long)generation, 291 (unsigned long long)generation,
292 (unsigned long long)block_group->key.objectid); 292 (unsigned long long)block_group->key.objectid);
293 goto out; 293 goto free_cache;
294 } 294 }
295 295
296 if (!num_entries) 296 if (!num_entries)
@@ -524,6 +524,12 @@ int btrfs_write_out_cache(struct btrfs_root *root,
524 return 0; 524 return 0;
525 } 525 }
526 526
527 node = rb_first(&block_group->free_space_offset);
528 if (!node) {
529 iput(inode);
530 return 0;
531 }
532
527 last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; 533 last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
528 filemap_write_and_wait(inode->i_mapping); 534 filemap_write_and_wait(inode->i_mapping);
529 btrfs_wait_ordered_range(inode, inode->i_size & 535 btrfs_wait_ordered_range(inode, inode->i_size &
@@ -543,10 +549,6 @@ int btrfs_write_out_cache(struct btrfs_root *root,
543 */ 549 */
544 first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64); 550 first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
545 551
546 node = rb_first(&block_group->free_space_offset);
547 if (!node)
548 goto out_free;
549
550 /* 552 /*
551 * Lock all pages first so we can lock the extent safely. 553 * Lock all pages first so we can lock the extent safely.
552 * 554 *
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8039390bd6a6..72f31ecb5c90 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -495,7 +495,7 @@ again:
495 add_async_extent(async_cow, start, num_bytes, 495 add_async_extent(async_cow, start, num_bytes,
496 total_compressed, pages, nr_pages_ret); 496 total_compressed, pages, nr_pages_ret);
497 497
498 if (start + num_bytes < end && start + num_bytes < actual_end) { 498 if (start + num_bytes < end) {
499 start += num_bytes; 499 start += num_bytes;
500 pages = NULL; 500 pages = NULL;
501 cond_resched(); 501 cond_resched();
@@ -5712,9 +5712,9 @@ static void btrfs_end_dio_bio(struct bio *bio, int err)
5712 5712
5713 if (err) { 5713 if (err) {
5714 printk(KERN_ERR "btrfs direct IO failed ino %lu rw %lu " 5714 printk(KERN_ERR "btrfs direct IO failed ino %lu rw %lu "
5715 "disk_bytenr %lu len %u err no %d\n", 5715 "sector %#Lx len %u err no %d\n",
5716 dip->inode->i_ino, bio->bi_rw, bio->bi_sector, 5716 dip->inode->i_ino, bio->bi_rw,
5717 bio->bi_size, err); 5717 (unsigned long long)bio->bi_sector, bio->bi_size, err);
5718 dip->errors = 1; 5718 dip->errors = 1;
5719 5719
5720 /* 5720 /*
@@ -5934,8 +5934,7 @@ free_ordered:
5934 */ 5934 */
5935 if (write) { 5935 if (write) {
5936 struct btrfs_ordered_extent *ordered; 5936 struct btrfs_ordered_extent *ordered;
5937 ordered = btrfs_lookup_ordered_extent(inode, 5937 ordered = btrfs_lookup_ordered_extent(inode, file_offset);
5938 dip->logical_offset);
5939 if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) && 5938 if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) &&
5940 !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) 5939 !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags))
5941 btrfs_free_reserved_extent(root, ordered->start, 5940 btrfs_free_reserved_extent(root, ordered->start,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index f1c9bb4079ed..f87552a1d7ea 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -947,23 +947,42 @@ out:
947 947
948static noinline int btrfs_ioctl_snap_create(struct file *file, 948static noinline int btrfs_ioctl_snap_create(struct file *file,
949 void __user *arg, int subvol, 949 void __user *arg, int subvol,
950 int async) 950 int v2)
951{ 951{
952 struct btrfs_ioctl_vol_args *vol_args = NULL; 952 struct btrfs_ioctl_vol_args *vol_args = NULL;
953 struct btrfs_ioctl_async_vol_args *async_vol_args = NULL; 953 struct btrfs_ioctl_vol_args_v2 *vol_args_v2 = NULL;
954 char *name; 954 char *name;
955 u64 fd; 955 u64 fd;
956 u64 transid = 0;
957 int ret; 956 int ret;
958 957
959 if (async) { 958 if (v2) {
960 async_vol_args = memdup_user(arg, sizeof(*async_vol_args)); 959 u64 transid = 0;
961 if (IS_ERR(async_vol_args)) 960 u64 *ptr = NULL;
962 return PTR_ERR(async_vol_args);
963 961
964 name = async_vol_args->name; 962 vol_args_v2 = memdup_user(arg, sizeof(*vol_args_v2));
965 fd = async_vol_args->fd; 963 if (IS_ERR(vol_args_v2))
966 async_vol_args->name[BTRFS_SNAPSHOT_NAME_MAX] = '\0'; 964 return PTR_ERR(vol_args_v2);
965
966 if (vol_args_v2->flags & ~BTRFS_SUBVOL_CREATE_ASYNC) {
967 ret = -EINVAL;
968 goto out;
969 }
970
971 name = vol_args_v2->name;
972 fd = vol_args_v2->fd;
973 vol_args_v2->name[BTRFS_SUBVOL_NAME_MAX] = '\0';
974
975 if (vol_args_v2->flags & BTRFS_SUBVOL_CREATE_ASYNC)
976 ptr = &transid;
977
978 ret = btrfs_ioctl_snap_create_transid(file, name, fd,
979 subvol, ptr);
980
981 if (ret == 0 && ptr &&
982 copy_to_user(arg +
983 offsetof(struct btrfs_ioctl_vol_args_v2,
984 transid), ptr, sizeof(*ptr)))
985 ret = -EFAULT;
967 } else { 986 } else {
968 vol_args = memdup_user(arg, sizeof(*vol_args)); 987 vol_args = memdup_user(arg, sizeof(*vol_args));
969 if (IS_ERR(vol_args)) 988 if (IS_ERR(vol_args))
@@ -971,20 +990,13 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
971 name = vol_args->name; 990 name = vol_args->name;
972 fd = vol_args->fd; 991 fd = vol_args->fd;
973 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 992 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
974 }
975
976 ret = btrfs_ioctl_snap_create_transid(file, name, fd,
977 subvol, &transid);
978 993
979 if (!ret && async) { 994 ret = btrfs_ioctl_snap_create_transid(file, name, fd,
980 if (copy_to_user(arg + 995 subvol, NULL);
981 offsetof(struct btrfs_ioctl_async_vol_args,
982 transid), &transid, sizeof(transid)))
983 return -EFAULT;
984 } 996 }
985 997out:
986 kfree(vol_args); 998 kfree(vol_args);
987 kfree(async_vol_args); 999 kfree(vol_args_v2);
988 1000
989 return ret; 1001 return ret;
990} 1002}
@@ -2246,7 +2258,7 @@ long btrfs_ioctl(struct file *file, unsigned int
2246 return btrfs_ioctl_getversion(file, argp); 2258 return btrfs_ioctl_getversion(file, argp);
2247 case BTRFS_IOC_SNAP_CREATE: 2259 case BTRFS_IOC_SNAP_CREATE:
2248 return btrfs_ioctl_snap_create(file, argp, 0, 0); 2260 return btrfs_ioctl_snap_create(file, argp, 0, 0);
2249 case BTRFS_IOC_SNAP_CREATE_ASYNC: 2261 case BTRFS_IOC_SNAP_CREATE_V2:
2250 return btrfs_ioctl_snap_create(file, argp, 0, 1); 2262 return btrfs_ioctl_snap_create(file, argp, 0, 1);
2251 case BTRFS_IOC_SUBVOL_CREATE: 2263 case BTRFS_IOC_SUBVOL_CREATE:
2252 return btrfs_ioctl_snap_create(file, argp, 1, 0); 2264 return btrfs_ioctl_snap_create(file, argp, 1, 0);
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index 17c99ebdf960..c344d12c646b 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -30,11 +30,15 @@ struct btrfs_ioctl_vol_args {
30 char name[BTRFS_PATH_NAME_MAX + 1]; 30 char name[BTRFS_PATH_NAME_MAX + 1];
31}; 31};
32 32
33#define BTRFS_SNAPSHOT_NAME_MAX 4079 33#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0)
34struct btrfs_ioctl_async_vol_args { 34
35#define BTRFS_SUBVOL_NAME_MAX 4039
36struct btrfs_ioctl_vol_args_v2 {
35 __s64 fd; 37 __s64 fd;
36 __u64 transid; 38 __u64 transid;
37 char name[BTRFS_SNAPSHOT_NAME_MAX + 1]; 39 __u64 flags;
40 __u64 unused[4];
41 char name[BTRFS_SUBVOL_NAME_MAX + 1];
38}; 42};
39 43
40#define BTRFS_INO_LOOKUP_PATH_MAX 4080 44#define BTRFS_INO_LOOKUP_PATH_MAX 4080
@@ -187,6 +191,6 @@ struct btrfs_ioctl_space_args {
187 struct btrfs_ioctl_space_args) 191 struct btrfs_ioctl_space_args)
188#define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64) 192#define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64)
189#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) 193#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)
190#define BTRFS_IOC_SNAP_CREATE_ASYNC _IOW(BTRFS_IOCTL_MAGIC, 23, \ 194#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \
191 struct btrfs_ioctl_async_vol_args) 195 struct btrfs_ioctl_vol_args_v2)
192#endif 196#endif
diff --git a/fs/btrfs/orphan.c b/fs/btrfs/orphan.c
index 79cba5fbc28e..f8be250963a0 100644
--- a/fs/btrfs/orphan.c
+++ b/fs/btrfs/orphan.c
@@ -56,8 +56,12 @@ int btrfs_del_orphan_item(struct btrfs_trans_handle *trans,
56 return -ENOMEM; 56 return -ENOMEM;
57 57
58 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 58 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
59 if (ret) 59 if (ret < 0)
60 goto out; 60 goto out;
61 if (ret) {
62 ret = -ENOENT;
63 goto out;
64 }
61 65
62 ret = btrfs_del_item(trans, root, path); 66 ret = btrfs_del_item(trans, root, path);
63 67
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index dbb51ea7a13c..883c6fa1367e 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -685,9 +685,9 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
685 mutex_unlock(&root->d_inode->i_mutex); 685 mutex_unlock(&root->d_inode->i_mutex);
686 686
687 if (IS_ERR(new_root)) { 687 if (IS_ERR(new_root)) {
688 dput(root);
688 deactivate_locked_super(s); 689 deactivate_locked_super(s);
689 error = PTR_ERR(new_root); 690 error = PTR_ERR(new_root);
690 dput(root);
691 goto error_free_subvol_name; 691 goto error_free_subvol_name;
692 } 692 }
693 if (!new_root->d_inode) { 693 if (!new_root->d_inode) {
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index cc04dc1445d6..6b9884507837 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -412,12 +412,16 @@ static noinline int device_list_add(const char *path,
412 412
413 device->fs_devices = fs_devices; 413 device->fs_devices = fs_devices;
414 fs_devices->num_devices++; 414 fs_devices->num_devices++;
415 } else if (strcmp(device->name, path)) { 415 } else if (!device->name || strcmp(device->name, path)) {
416 name = kstrdup(path, GFP_NOFS); 416 name = kstrdup(path, GFP_NOFS);
417 if (!name) 417 if (!name)
418 return -ENOMEM; 418 return -ENOMEM;
419 kfree(device->name); 419 kfree(device->name);
420 device->name = name; 420 device->name = name;
421 if (device->missing) {
422 fs_devices->missing_devices--;
423 device->missing = 0;
424 }
421 } 425 }
422 426
423 if (found_transid > fs_devices->latest_trans) { 427 if (found_transid > fs_devices->latest_trans) {
@@ -1236,6 +1240,9 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1236 1240
1237 device->fs_devices->num_devices--; 1241 device->fs_devices->num_devices--;
1238 1242
1243 if (device->missing)
1244 root->fs_info->fs_devices->missing_devices--;
1245
1239 next_device = list_entry(root->fs_info->fs_devices->devices.next, 1246 next_device = list_entry(root->fs_info->fs_devices->devices.next,
1240 struct btrfs_device, dev_list); 1247 struct btrfs_device, dev_list);
1241 if (device->bdev == root->fs_info->sb->s_bdev) 1248 if (device->bdev == root->fs_info->sb->s_bdev)
@@ -3080,7 +3087,9 @@ static struct btrfs_device *add_missing_dev(struct btrfs_root *root,
3080 device->devid = devid; 3087 device->devid = devid;
3081 device->work.func = pending_bios_fn; 3088 device->work.func = pending_bios_fn;
3082 device->fs_devices = fs_devices; 3089 device->fs_devices = fs_devices;
3090 device->missing = 1;
3083 fs_devices->num_devices++; 3091 fs_devices->num_devices++;
3092 fs_devices->missing_devices++;
3084 spin_lock_init(&device->io_lock); 3093 spin_lock_init(&device->io_lock);
3085 INIT_LIST_HEAD(&device->dev_alloc_list); 3094 INIT_LIST_HEAD(&device->dev_alloc_list);
3086 memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE); 3095 memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE);
@@ -3278,6 +3287,15 @@ static int read_one_dev(struct btrfs_root *root,
3278 device = add_missing_dev(root, devid, dev_uuid); 3287 device = add_missing_dev(root, devid, dev_uuid);
3279 if (!device) 3288 if (!device)
3280 return -ENOMEM; 3289 return -ENOMEM;
3290 } else if (!device->missing) {
3291 /*
3292 * this happens when a device that was properly setup
3293 * in the device info lists suddenly goes bad.
3294 * device->bdev is NULL, and so we have to set
3295 * device->missing to one here
3296 */
3297 root->fs_info->fs_devices->missing_devices++;
3298 device->missing = 1;
3281 } 3299 }
3282 } 3300 }
3283 3301
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 2b638b6e4eea..2740db49eb04 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -44,6 +44,7 @@ struct btrfs_device {
44 44
45 int writeable; 45 int writeable;
46 int in_fs_metadata; 46 int in_fs_metadata;
47 int missing;
47 48
48 spinlock_t io_lock; 49 spinlock_t io_lock;
49 50
@@ -93,6 +94,7 @@ struct btrfs_fs_devices {
93 u64 num_devices; 94 u64 num_devices;
94 u64 open_devices; 95 u64 open_devices;
95 u64 rw_devices; 96 u64 rw_devices;
97 u64 missing_devices;
96 u64 total_rw_bytes; 98 u64 total_rw_bytes;
97 struct block_device *latest_bdev; 99 struct block_device *latest_bdev;
98 100
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 7d447af84ec4..d902948a90d8 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -40,7 +40,8 @@ int ceph_init_dentry(struct dentry *dentry)
40 if (dentry->d_fsdata) 40 if (dentry->d_fsdata)
41 return 0; 41 return 0;
42 42
43 if (ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP) 43 if (dentry->d_parent == NULL || /* nfs fh_to_dentry */
44 ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP)
44 dentry->d_op = &ceph_dentry_ops; 45 dentry->d_op = &ceph_dentry_ops;
45 else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR) 46 else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR)
46 dentry->d_op = &ceph_snapdir_dentry_ops; 47 dentry->d_op = &ceph_snapdir_dentry_ops;
@@ -114,8 +115,8 @@ static int __dcache_readdir(struct file *filp,
114 spin_lock(&dcache_lock); 115 spin_lock(&dcache_lock);
115 116
116 /* start at beginning? */ 117 /* start at beginning? */
117 if (filp->f_pos == 2 || (last && 118 if (filp->f_pos == 2 || last == NULL ||
118 filp->f_pos < ceph_dentry(last)->offset)) { 119 filp->f_pos < ceph_dentry(last)->offset) {
119 if (list_empty(&parent->d_subdirs)) 120 if (list_empty(&parent->d_subdirs))
120 goto out_unlock; 121 goto out_unlock;
121 p = parent->d_subdirs.prev; 122 p = parent->d_subdirs.prev;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 8d79b8912e31..7d0e4a82d898 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -282,7 +282,8 @@ int ceph_release(struct inode *inode, struct file *file)
282static int striped_read(struct inode *inode, 282static int striped_read(struct inode *inode,
283 u64 off, u64 len, 283 u64 off, u64 len,
284 struct page **pages, int num_pages, 284 struct page **pages, int num_pages,
285 int *checkeof, bool align_to_pages) 285 int *checkeof, bool align_to_pages,
286 unsigned long buf_align)
286{ 287{
287 struct ceph_fs_client *fsc = ceph_inode_to_client(inode); 288 struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
288 struct ceph_inode_info *ci = ceph_inode(inode); 289 struct ceph_inode_info *ci = ceph_inode(inode);
@@ -307,7 +308,7 @@ static int striped_read(struct inode *inode,
307 308
308more: 309more:
309 if (align_to_pages) 310 if (align_to_pages)
310 page_align = (pos - io_align) & ~PAGE_MASK; 311 page_align = (pos - io_align + buf_align) & ~PAGE_MASK;
311 else 312 else
312 page_align = pos & ~PAGE_MASK; 313 page_align = pos & ~PAGE_MASK;
313 this_len = left; 314 this_len = left;
@@ -376,16 +377,18 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data,
376 struct inode *inode = file->f_dentry->d_inode; 377 struct inode *inode = file->f_dentry->d_inode;
377 struct page **pages; 378 struct page **pages;
378 u64 off = *poff; 379 u64 off = *poff;
379 int num_pages = calc_pages_for(off, len); 380 int num_pages, ret;
380 int ret;
381 381
382 dout("sync_read on file %p %llu~%u %s\n", file, off, len, 382 dout("sync_read on file %p %llu~%u %s\n", file, off, len,
383 (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); 383 (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
384 384
385 if (file->f_flags & O_DIRECT) 385 if (file->f_flags & O_DIRECT) {
386 pages = ceph_get_direct_page_vector(data, num_pages); 386 num_pages = calc_pages_for((unsigned long)data, len);
387 else 387 pages = ceph_get_direct_page_vector(data, num_pages, true);
388 } else {
389 num_pages = calc_pages_for(off, len);
388 pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); 390 pages = ceph_alloc_page_vector(num_pages, GFP_NOFS);
391 }
389 if (IS_ERR(pages)) 392 if (IS_ERR(pages))
390 return PTR_ERR(pages); 393 return PTR_ERR(pages);
391 394
@@ -400,7 +403,8 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data,
400 goto done; 403 goto done;
401 404
402 ret = striped_read(inode, off, len, pages, num_pages, checkeof, 405 ret = striped_read(inode, off, len, pages, num_pages, checkeof,
403 file->f_flags & O_DIRECT); 406 file->f_flags & O_DIRECT,
407 (unsigned long)data & ~PAGE_MASK);
404 408
405 if (ret >= 0 && (file->f_flags & O_DIRECT) == 0) 409 if (ret >= 0 && (file->f_flags & O_DIRECT) == 0)
406 ret = ceph_copy_page_vector_to_user(pages, data, off, ret); 410 ret = ceph_copy_page_vector_to_user(pages, data, off, ret);
@@ -409,7 +413,7 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data,
409 413
410done: 414done:
411 if (file->f_flags & O_DIRECT) 415 if (file->f_flags & O_DIRECT)
412 ceph_put_page_vector(pages, num_pages); 416 ceph_put_page_vector(pages, num_pages, true);
413 else 417 else
414 ceph_release_page_vector(pages, num_pages); 418 ceph_release_page_vector(pages, num_pages);
415 dout("sync_read result %d\n", ret); 419 dout("sync_read result %d\n", ret);
@@ -456,6 +460,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
456 int do_sync = 0; 460 int do_sync = 0;
457 int check_caps = 0; 461 int check_caps = 0;
458 int page_align, io_align; 462 int page_align, io_align;
463 unsigned long buf_align;
459 int ret; 464 int ret;
460 struct timespec mtime = CURRENT_TIME; 465 struct timespec mtime = CURRENT_TIME;
461 466
@@ -471,6 +476,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
471 pos = *offset; 476 pos = *offset;
472 477
473 io_align = pos & ~PAGE_MASK; 478 io_align = pos & ~PAGE_MASK;
479 buf_align = (unsigned long)data & ~PAGE_MASK;
474 480
475 ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left); 481 ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left);
476 if (ret < 0) 482 if (ret < 0)
@@ -496,12 +502,15 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
496 */ 502 */
497more: 503more:
498 len = left; 504 len = left;
499 if (file->f_flags & O_DIRECT) 505 if (file->f_flags & O_DIRECT) {
500 /* write from beginning of first page, regardless of 506 /* write from beginning of first page, regardless of
501 io alignment */ 507 io alignment */
502 page_align = (pos - io_align) & ~PAGE_MASK; 508 page_align = (pos - io_align + buf_align) & ~PAGE_MASK;
503 else 509 num_pages = calc_pages_for((unsigned long)data, len);
510 } else {
504 page_align = pos & ~PAGE_MASK; 511 page_align = pos & ~PAGE_MASK;
512 num_pages = calc_pages_for(pos, len);
513 }
505 req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, 514 req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
506 ceph_vino(inode), pos, &len, 515 ceph_vino(inode), pos, &len,
507 CEPH_OSD_OP_WRITE, flags, 516 CEPH_OSD_OP_WRITE, flags,
@@ -512,10 +521,8 @@ more:
512 if (!req) 521 if (!req)
513 return -ENOMEM; 522 return -ENOMEM;
514 523
515 num_pages = calc_pages_for(pos, len);
516
517 if (file->f_flags & O_DIRECT) { 524 if (file->f_flags & O_DIRECT) {
518 pages = ceph_get_direct_page_vector(data, num_pages); 525 pages = ceph_get_direct_page_vector(data, num_pages, false);
519 if (IS_ERR(pages)) { 526 if (IS_ERR(pages)) {
520 ret = PTR_ERR(pages); 527 ret = PTR_ERR(pages);
521 goto out; 528 goto out;
@@ -565,7 +572,7 @@ more:
565 } 572 }
566 573
567 if (file->f_flags & O_DIRECT) 574 if (file->f_flags & O_DIRECT)
568 ceph_put_page_vector(pages, num_pages); 575 ceph_put_page_vector(pages, num_pages, false);
569 else if (file->f_flags & O_SYNC) 576 else if (file->f_flags & O_SYNC)
570 ceph_release_page_vector(pages, num_pages); 577 ceph_release_page_vector(pages, num_pages);
571 578
diff --git a/fs/ceph/ioctl.h b/fs/ceph/ioctl.h
index a6ce54e94eb5..52e8fd74d450 100644
--- a/fs/ceph/ioctl.h
+++ b/fs/ceph/ioctl.h
@@ -4,7 +4,7 @@
4#include <linux/ioctl.h> 4#include <linux/ioctl.h>
5#include <linux/types.h> 5#include <linux/types.h>
6 6
7#define CEPH_IOCTL_MAGIC 0x98 7#define CEPH_IOCTL_MAGIC 0x97
8 8
9/* just use u64 to align sanely on all archs */ 9/* just use u64 to align sanely on all archs */
10struct ceph_ioctl_layout { 10struct ceph_ioctl_layout {
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index 40abde93c345..476b329867d4 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -11,40 +11,68 @@
11 * Implement fcntl and flock locking functions. 11 * Implement fcntl and flock locking functions.
12 */ 12 */
13static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, 13static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
14 u64 pid, u64 pid_ns, 14 int cmd, u8 wait, struct file_lock *fl)
15 int cmd, u64 start, u64 length, u8 wait)
16{ 15{
17 struct inode *inode = file->f_dentry->d_inode; 16 struct inode *inode = file->f_dentry->d_inode;
18 struct ceph_mds_client *mdsc = 17 struct ceph_mds_client *mdsc =
19 ceph_sb_to_client(inode->i_sb)->mdsc; 18 ceph_sb_to_client(inode->i_sb)->mdsc;
20 struct ceph_mds_request *req; 19 struct ceph_mds_request *req;
21 int err; 20 int err;
21 u64 length = 0;
22 22
23 req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS); 23 req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS);
24 if (IS_ERR(req)) 24 if (IS_ERR(req))
25 return PTR_ERR(req); 25 return PTR_ERR(req);
26 req->r_inode = igrab(inode); 26 req->r_inode = igrab(inode);
27 27
28 /* mds requires start and length rather than start and end */
29 if (LLONG_MAX == fl->fl_end)
30 length = 0;
31 else
32 length = fl->fl_end - fl->fl_start + 1;
33
28 dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " 34 dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
29 "length: %llu, wait: %d, type`: %d", (int)lock_type, 35 "length: %llu, wait: %d, type`: %d", (int)lock_type,
30 (int)operation, pid, start, length, wait, cmd); 36 (int)operation, (u64)fl->fl_pid, fl->fl_start,
37 length, wait, fl->fl_type);
38
31 39
32 req->r_args.filelock_change.rule = lock_type; 40 req->r_args.filelock_change.rule = lock_type;
33 req->r_args.filelock_change.type = cmd; 41 req->r_args.filelock_change.type = cmd;
34 req->r_args.filelock_change.pid = cpu_to_le64(pid); 42 req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid);
35 /* This should be adjusted, but I'm not sure if 43 /* This should be adjusted, but I'm not sure if
36 namespaces actually get id numbers*/ 44 namespaces actually get id numbers*/
37 req->r_args.filelock_change.pid_namespace = 45 req->r_args.filelock_change.pid_namespace =
38 cpu_to_le64((u64)pid_ns); 46 cpu_to_le64((u64)(unsigned long)fl->fl_nspid);
39 req->r_args.filelock_change.start = cpu_to_le64(start); 47 req->r_args.filelock_change.start = cpu_to_le64(fl->fl_start);
40 req->r_args.filelock_change.length = cpu_to_le64(length); 48 req->r_args.filelock_change.length = cpu_to_le64(length);
41 req->r_args.filelock_change.wait = wait; 49 req->r_args.filelock_change.wait = wait;
42 50
43 err = ceph_mdsc_do_request(mdsc, inode, req); 51 err = ceph_mdsc_do_request(mdsc, inode, req);
52
53 if ( operation == CEPH_MDS_OP_GETFILELOCK){
54 fl->fl_pid = le64_to_cpu(req->r_reply_info.filelock_reply->pid);
55 if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type)
56 fl->fl_type = F_RDLCK;
57 else if (CEPH_LOCK_EXCL == req->r_reply_info.filelock_reply->type)
58 fl->fl_type = F_WRLCK;
59 else
60 fl->fl_type = F_UNLCK;
61
62 fl->fl_start = le64_to_cpu(req->r_reply_info.filelock_reply->start);
63 length = le64_to_cpu(req->r_reply_info.filelock_reply->start) +
64 le64_to_cpu(req->r_reply_info.filelock_reply->length);
65 if (length >= 1)
66 fl->fl_end = length -1;
67 else
68 fl->fl_end = 0;
69
70 }
44 ceph_mdsc_put_request(req); 71 ceph_mdsc_put_request(req);
45 dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " 72 dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
46 "length: %llu, wait: %d, type`: %d err code %d", (int)lock_type, 73 "length: %llu, wait: %d, type`: %d, err code %d", (int)lock_type,
47 (int)operation, pid, start, length, wait, cmd, err); 74 (int)operation, (u64)fl->fl_pid, fl->fl_start,
75 length, wait, fl->fl_type, err);
48 return err; 76 return err;
49} 77}
50 78
@@ -54,7 +82,6 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
54 */ 82 */
55int ceph_lock(struct file *file, int cmd, struct file_lock *fl) 83int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
56{ 84{
57 u64 length;
58 u8 lock_cmd; 85 u8 lock_cmd;
59 int err; 86 int err;
60 u8 wait = 0; 87 u8 wait = 0;
@@ -76,29 +103,20 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
76 else 103 else
77 lock_cmd = CEPH_LOCK_UNLOCK; 104 lock_cmd = CEPH_LOCK_UNLOCK;
78 105
79 if (LLONG_MAX == fl->fl_end) 106 err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl);
80 length = 0;
81 else
82 length = fl->fl_end - fl->fl_start + 1;
83
84 err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
85 (u64)fl->fl_pid,
86 (u64)(unsigned long)fl->fl_nspid,
87 lock_cmd, fl->fl_start,
88 length, wait);
89 if (!err) { 107 if (!err) {
90 dout("mds locked, locking locally"); 108 if ( op != CEPH_MDS_OP_GETFILELOCK ){
91 err = posix_lock_file(file, fl, NULL); 109 dout("mds locked, locking locally");
92 if (err && (CEPH_MDS_OP_SETFILELOCK == op)) { 110 err = posix_lock_file(file, fl, NULL);
93 /* undo! This should only happen if the kernel detects 111 if (err && (CEPH_MDS_OP_SETFILELOCK == op)) {
94 * local deadlock. */ 112 /* undo! This should only happen if the kernel detects
95 ceph_lock_message(CEPH_LOCK_FCNTL, op, file, 113 * local deadlock. */
96 (u64)fl->fl_pid, 114 ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
97 (u64)(unsigned long)fl->fl_nspid, 115 CEPH_LOCK_UNLOCK, 0, fl);
98 CEPH_LOCK_UNLOCK, fl->fl_start, 116 dout("got %d on posix_lock_file, undid lock", err);
99 length, 0); 117 }
100 dout("got %d on posix_lock_file, undid lock", err);
101 } 118 }
119
102 } else { 120 } else {
103 dout("mds returned error code %d", err); 121 dout("mds returned error code %d", err);
104 } 122 }
@@ -107,7 +125,6 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
107 125
108int ceph_flock(struct file *file, int cmd, struct file_lock *fl) 126int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
109{ 127{
110 u64 length;
111 u8 lock_cmd; 128 u8 lock_cmd;
112 int err; 129 int err;
113 u8 wait = 1; 130 u8 wait = 1;
@@ -127,26 +144,15 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
127 lock_cmd = CEPH_LOCK_EXCL; 144 lock_cmd = CEPH_LOCK_EXCL;
128 else 145 else
129 lock_cmd = CEPH_LOCK_UNLOCK; 146 lock_cmd = CEPH_LOCK_UNLOCK;
130 /* mds requires start and length rather than start and end */
131 if (LLONG_MAX == fl->fl_end)
132 length = 0;
133 else
134 length = fl->fl_end - fl->fl_start + 1;
135 147
136 err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, 148 err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK,
137 file, (u64)fl->fl_pid, 149 file, lock_cmd, wait, fl);
138 (u64)(unsigned long)fl->fl_nspid,
139 lock_cmd, fl->fl_start,
140 length, wait);
141 if (!err) { 150 if (!err) {
142 err = flock_lock_file_wait(file, fl); 151 err = flock_lock_file_wait(file, fl);
143 if (err) { 152 if (err) {
144 ceph_lock_message(CEPH_LOCK_FLOCK, 153 ceph_lock_message(CEPH_LOCK_FLOCK,
145 CEPH_MDS_OP_SETFILELOCK, 154 CEPH_MDS_OP_SETFILELOCK,
146 file, (u64)fl->fl_pid, 155 file, CEPH_LOCK_UNLOCK, 0, fl);
147 (u64)(unsigned long)fl->fl_nspid,
148 CEPH_LOCK_UNLOCK, fl->fl_start,
149 length, 0);
150 dout("got %d on flock_lock_file_wait, undid lock", err); 156 dout("got %d on flock_lock_file_wait, undid lock", err);
151 } 157 }
152 } else { 158 } else {
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 098b18508479..38800eaa81d0 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -202,6 +202,38 @@ out_bad:
202} 202}
203 203
204/* 204/*
205 * parse fcntl F_GETLK results
206 */
207static int parse_reply_info_filelock(void **p, void *end,
208 struct ceph_mds_reply_info_parsed *info)
209{
210 if (*p + sizeof(*info->filelock_reply) > end)
211 goto bad;
212
213 info->filelock_reply = *p;
214 *p += sizeof(*info->filelock_reply);
215
216 if (unlikely(*p != end))
217 goto bad;
218 return 0;
219
220bad:
221 return -EIO;
222}
223
224/*
225 * parse extra results
226 */
227static int parse_reply_info_extra(void **p, void *end,
228 struct ceph_mds_reply_info_parsed *info)
229{
230 if (info->head->op == CEPH_MDS_OP_GETFILELOCK)
231 return parse_reply_info_filelock(p, end, info);
232 else
233 return parse_reply_info_dir(p, end, info);
234}
235
236/*
205 * parse entire mds reply 237 * parse entire mds reply
206 */ 238 */
207static int parse_reply_info(struct ceph_msg *msg, 239static int parse_reply_info(struct ceph_msg *msg,
@@ -223,10 +255,10 @@ static int parse_reply_info(struct ceph_msg *msg,
223 goto out_bad; 255 goto out_bad;
224 } 256 }
225 257
226 /* dir content */ 258 /* extra */
227 ceph_decode_32_safe(&p, end, len, bad); 259 ceph_decode_32_safe(&p, end, len, bad);
228 if (len > 0) { 260 if (len > 0) {
229 err = parse_reply_info_dir(&p, p+len, info); 261 err = parse_reply_info_extra(&p, p+len, info);
230 if (err < 0) 262 if (err < 0)
231 goto out_bad; 263 goto out_bad;
232 } 264 }
@@ -2074,7 +2106,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
2074 2106
2075 mutex_lock(&session->s_mutex); 2107 mutex_lock(&session->s_mutex);
2076 if (err < 0) { 2108 if (err < 0) {
2077 pr_err("mdsc_handle_reply got corrupt reply mds%d\n", mds); 2109 pr_err("mdsc_handle_reply got corrupt reply mds%d(tid:%lld)\n", mds, tid);
2078 ceph_msg_dump(msg); 2110 ceph_msg_dump(msg);
2079 goto out_err; 2111 goto out_err;
2080 } 2112 }
@@ -2094,7 +2126,8 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
2094 mutex_lock(&req->r_fill_mutex); 2126 mutex_lock(&req->r_fill_mutex);
2095 err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session); 2127 err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session);
2096 if (err == 0) { 2128 if (err == 0) {
2097 if (result == 0 && rinfo->dir_nr) 2129 if (result == 0 && req->r_op != CEPH_MDS_OP_GETFILELOCK &&
2130 rinfo->dir_nr)
2098 ceph_readdir_prepopulate(req, req->r_session); 2131 ceph_readdir_prepopulate(req, req->r_session);
2099 ceph_unreserve_caps(mdsc, &req->r_caps_reservation); 2132 ceph_unreserve_caps(mdsc, &req->r_caps_reservation);
2100 } 2133 }
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 9341fd4f1432..aabe563b54db 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -42,26 +42,37 @@ struct ceph_mds_reply_info_in {
42}; 42};
43 43
44/* 44/*
45 * parsed info about an mds reply, including information about the 45 * parsed info about an mds reply, including information about
46 * target inode and/or its parent directory and dentry, and directory 46 * either: 1) the target inode and/or its parent directory and dentry,
47 * contents (for readdir results). 47 * and directory contents (for readdir results), or
48 * 2) the file range lock info (for fcntl F_GETLK results).
48 */ 49 */
49struct ceph_mds_reply_info_parsed { 50struct ceph_mds_reply_info_parsed {
50 struct ceph_mds_reply_head *head; 51 struct ceph_mds_reply_head *head;
51 52
53 /* trace */
52 struct ceph_mds_reply_info_in diri, targeti; 54 struct ceph_mds_reply_info_in diri, targeti;
53 struct ceph_mds_reply_dirfrag *dirfrag; 55 struct ceph_mds_reply_dirfrag *dirfrag;
54 char *dname; 56 char *dname;
55 u32 dname_len; 57 u32 dname_len;
56 struct ceph_mds_reply_lease *dlease; 58 struct ceph_mds_reply_lease *dlease;
57 59
58 struct ceph_mds_reply_dirfrag *dir_dir; 60 /* extra */
59 int dir_nr; 61 union {
60 char **dir_dname; 62 /* for fcntl F_GETLK results */
61 u32 *dir_dname_len; 63 struct ceph_filelock *filelock_reply;
62 struct ceph_mds_reply_lease **dir_dlease; 64
63 struct ceph_mds_reply_info_in *dir_in; 65 /* for readdir results */
64 u8 dir_complete, dir_end; 66 struct {
67 struct ceph_mds_reply_dirfrag *dir_dir;
68 int dir_nr;
69 char **dir_dname;
70 u32 *dir_dname_len;
71 struct ceph_mds_reply_lease **dir_dlease;
72 struct ceph_mds_reply_info_in *dir_in;
73 u8 dir_complete, dir_end;
74 };
75 };
65 76
66 /* encoded blob describing snapshot contexts for certain 77 /* encoded blob describing snapshot contexts for certain
67 operations (e.g., open) */ 78 operations (e.g., open) */
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index adefa60a9bdc..43b19dd39191 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -6,7 +6,9 @@ obj-$(CONFIG_CIFS) += cifs.o
6cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \ 6cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \
7 link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o \ 7 link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o \
8 md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o \ 8 md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o \
9 readdir.o ioctl.o sess.o export.o cifsacl.o 9 readdir.o ioctl.o sess.o export.o
10
11cifs-$(CONFIG_CIFS_ACL) += cifsacl.o
10 12
11cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o 13cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o
12 14
diff --git a/fs/cifs/README b/fs/cifs/README
index ee68d1036544..46af99ab3614 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -337,6 +337,15 @@ A partial list of the supported mount options follows:
337 wsize default write size (default 57344) 337 wsize default write size (default 57344)
338 maximum wsize currently allowed by CIFS is 57344 (fourteen 338 maximum wsize currently allowed by CIFS is 57344 (fourteen
339 4096 byte pages) 339 4096 byte pages)
340 actimeo=n attribute cache timeout in seconds (default 1 second).
341 After this timeout, the cifs client requests fresh attribute
342 information from the server. This option allows to tune the
343 attribute cache timeout to suit the workload needs. Shorter
344 timeouts mean better the cache coherency, but increased number
345 of calls to the server. Longer timeouts mean reduced number
346 of calls to the server at the expense of less stricter cache
347 coherency checks (i.e. incorrect attribute cache for a short
348 period of time).
340 rw mount the network share read-write (note that the 349 rw mount the network share read-write (note that the
341 server may still consider the share read-only) 350 server may still consider the share read-only)
342 ro mount network share read-only 351 ro mount network share read-only
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index e9a393c9c2ca..7852cd677051 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -48,6 +48,7 @@ struct cifs_sb_info {
48 struct nls_table *local_nls; 48 struct nls_table *local_nls;
49 unsigned int rsize; 49 unsigned int rsize;
50 unsigned int wsize; 50 unsigned int wsize;
51 unsigned long actimeo; /* attribute cache timeout (jiffies) */
51 atomic_t active; 52 atomic_t active;
52 uid_t mnt_uid; 53 uid_t mnt_uid;
53 gid_t mnt_gid; 54 gid_t mnt_gid;
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index c6ebea088ac7..a437ec391a01 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -30,8 +30,6 @@
30#include "cifs_debug.h" 30#include "cifs_debug.h"
31 31
32 32
33#ifdef CONFIG_CIFS_EXPERIMENTAL
34
35static struct cifs_wksid wksidarr[NUM_WK_SIDS] = { 33static struct cifs_wksid wksidarr[NUM_WK_SIDS] = {
36 {{1, 0, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0} }, "null user"}, 34 {{1, 0, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0} }, "null user"},
37 {{1, 1, {0, 0, 0, 0, 0, 1}, {0, 0, 0, 0, 0} }, "nobody"}, 35 {{1, 1, {0, 0, 0, 0, 0, 1}, {0, 0, 0, 0, 0} }, "nobody"},
@@ -774,4 +772,3 @@ int mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode)
774 772
775 return rc; 773 return rc;
776} 774}
777#endif /* CONFIG_CIFS_EXPERIMENTAL */
diff --git a/fs/cifs/cifsacl.h b/fs/cifs/cifsacl.h
index 6c8096cf5155..c4ae7d036563 100644
--- a/fs/cifs/cifsacl.h
+++ b/fs/cifs/cifsacl.h
@@ -74,11 +74,7 @@ struct cifs_wksid {
74 char sidname[SIDNAMELENGTH]; 74 char sidname[SIDNAMELENGTH];
75} __attribute__((packed)); 75} __attribute__((packed));
76 76
77#ifdef CONFIG_CIFS_EXPERIMENTAL
78
79extern int match_sid(struct cifs_sid *); 77extern int match_sid(struct cifs_sid *);
80extern int compare_sids(const struct cifs_sid *, const struct cifs_sid *); 78extern int compare_sids(const struct cifs_sid *, const struct cifs_sid *);
81 79
82#endif /* CONFIG_CIFS_EXPERIMENTAL */
83
84#endif /* _CIFSACL_H */ 80#endif /* _CIFSACL_H */
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 76c8a906a63e..3936aa7f2c22 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -463,6 +463,8 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m)
463 463
464 seq_printf(s, ",rsize=%d", cifs_sb->rsize); 464 seq_printf(s, ",rsize=%d", cifs_sb->rsize);
465 seq_printf(s, ",wsize=%d", cifs_sb->wsize); 465 seq_printf(s, ",wsize=%d", cifs_sb->wsize);
466 /* convert actimeo and display it in seconds */
467 seq_printf(s, ",actimeo=%lu", cifs_sb->actimeo / HZ);
466 468
467 return 0; 469 return 0;
468} 470}
@@ -935,7 +937,6 @@ init_cifs(void)
935 GlobalCurrentXid = 0; 937 GlobalCurrentXid = 0;
936 GlobalTotalActiveXid = 0; 938 GlobalTotalActiveXid = 0;
937 GlobalMaxActiveXid = 0; 939 GlobalMaxActiveXid = 0;
938 memset(Local_System_Name, 0, 15);
939 spin_lock_init(&cifs_tcp_ses_lock); 940 spin_lock_init(&cifs_tcp_ses_lock);
940 spin_lock_init(&cifs_file_list_lock); 941 spin_lock_init(&cifs_file_list_lock);
941 spin_lock_init(&GlobalMid_Lock); 942 spin_lock_init(&GlobalMid_Lock);
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index b577bf0a1bb3..7136c0c3e2f9 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -45,6 +45,16 @@
45#define CIFS_MIN_RCV_POOL 4 45#define CIFS_MIN_RCV_POOL 4
46 46
47/* 47/*
48 * default attribute cache timeout (jiffies)
49 */
50#define CIFS_DEF_ACTIMEO (1 * HZ)
51
52/*
53 * max attribute cache timeout (jiffies) - 2^30
54 */
55#define CIFS_MAX_ACTIMEO (1 << 30)
56
57/*
48 * MAX_REQ is the maximum number of requests that WE will send 58 * MAX_REQ is the maximum number of requests that WE will send
49 * on one socket concurrently. It also matches the most common 59 * on one socket concurrently. It also matches the most common
50 * value of max multiplex returned by servers. We may 60 * value of max multiplex returned by servers. We may
@@ -746,8 +756,6 @@ GLOBAL_EXTERN unsigned int GlobalTotalActiveXid; /* prot by GlobalMid_Sem */
746GLOBAL_EXTERN unsigned int GlobalMaxActiveXid; /* prot by GlobalMid_Sem */ 756GLOBAL_EXTERN unsigned int GlobalMaxActiveXid; /* prot by GlobalMid_Sem */
747GLOBAL_EXTERN spinlock_t GlobalMid_Lock; /* protects above & list operations */ 757GLOBAL_EXTERN spinlock_t GlobalMid_Lock; /* protects above & list operations */
748 /* on midQ entries */ 758 /* on midQ entries */
749GLOBAL_EXTERN char Local_System_Name[15];
750
751/* 759/*
752 * Global counters, updated atomically 760 * Global counters, updated atomically
753 */ 761 */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index db961dc4fd3d..e6d1481b16c1 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -54,7 +54,8 @@ do { \
54 __func__, curr_xid, (int)rc); \ 54 __func__, curr_xid, (int)rc); \
55} while (0) 55} while (0)
56extern char *build_path_from_dentry(struct dentry *); 56extern char *build_path_from_dentry(struct dentry *);
57extern char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb); 57extern char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb,
58 struct cifsTconInfo *tcon);
58extern char *build_wildcard_path_from_dentry(struct dentry *direntry); 59extern char *build_wildcard_path_from_dentry(struct dentry *direntry);
59extern char *cifs_compose_mount_options(const char *sb_mountdata, 60extern char *cifs_compose_mount_options(const char *sb_mountdata,
60 const char *fullpath, const struct dfs_info3_param *ref, 61 const char *fullpath, const struct dfs_info3_param *ref,
@@ -79,9 +80,7 @@ extern bool is_valid_oplock_break(struct smb_hdr *smb,
79 struct TCP_Server_Info *); 80 struct TCP_Server_Info *);
80extern bool is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof); 81extern bool is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof);
81extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool); 82extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool);
82#ifdef CONFIG_CIFS_EXPERIMENTAL
83extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool); 83extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool);
84#endif
85extern unsigned int smbCalcSize(struct smb_hdr *ptr); 84extern unsigned int smbCalcSize(struct smb_hdr *ptr);
86extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr); 85extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr);
87extern int decode_negTokenInit(unsigned char *security_blob, int length, 86extern int decode_negTokenInit(unsigned char *security_blob, int length,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 2f2632b6df5a..67acfb3acad2 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -2478,95 +2478,6 @@ querySymLinkRetry:
2478} 2478}
2479 2479
2480#ifdef CONFIG_CIFS_EXPERIMENTAL 2480#ifdef CONFIG_CIFS_EXPERIMENTAL
2481/* Initialize NT TRANSACT SMB into small smb request buffer.
2482 This assumes that all NT TRANSACTS that we init here have
2483 total parm and data under about 400 bytes (to fit in small cifs
2484 buffer size), which is the case so far, it easily fits. NB:
2485 Setup words themselves and ByteCount
2486 MaxSetupCount (size of returned setup area) and
2487 MaxParameterCount (returned parms size) must be set by caller */
2488static int
2489smb_init_nttransact(const __u16 sub_command, const int setup_count,
2490 const int parm_len, struct cifsTconInfo *tcon,
2491 void **ret_buf)
2492{
2493 int rc;
2494 __u32 temp_offset;
2495 struct smb_com_ntransact_req *pSMB;
2496
2497 rc = small_smb_init(SMB_COM_NT_TRANSACT, 19 + setup_count, tcon,
2498 (void **)&pSMB);
2499 if (rc)
2500 return rc;
2501 *ret_buf = (void *)pSMB;
2502 pSMB->Reserved = 0;
2503 pSMB->TotalParameterCount = cpu_to_le32(parm_len);
2504 pSMB->TotalDataCount = 0;
2505 pSMB->MaxDataCount = cpu_to_le32((tcon->ses->server->maxBuf -
2506 MAX_CIFS_HDR_SIZE) & 0xFFFFFF00);
2507 pSMB->ParameterCount = pSMB->TotalParameterCount;
2508 pSMB->DataCount = pSMB->TotalDataCount;
2509 temp_offset = offsetof(struct smb_com_ntransact_req, Parms) +
2510 (setup_count * 2) - 4 /* for rfc1001 length itself */;
2511 pSMB->ParameterOffset = cpu_to_le32(temp_offset);
2512 pSMB->DataOffset = cpu_to_le32(temp_offset + parm_len);
2513 pSMB->SetupCount = setup_count; /* no need to le convert byte fields */
2514 pSMB->SubCommand = cpu_to_le16(sub_command);
2515 return 0;
2516}
2517
2518static int
2519validate_ntransact(char *buf, char **ppparm, char **ppdata,
2520 __u32 *pparmlen, __u32 *pdatalen)
2521{
2522 char *end_of_smb;
2523 __u32 data_count, data_offset, parm_count, parm_offset;
2524 struct smb_com_ntransact_rsp *pSMBr;
2525
2526 *pdatalen = 0;
2527 *pparmlen = 0;
2528
2529 if (buf == NULL)
2530 return -EINVAL;
2531
2532 pSMBr = (struct smb_com_ntransact_rsp *)buf;
2533
2534 /* ByteCount was converted from little endian in SendReceive */
2535 end_of_smb = 2 /* sizeof byte count */ + pSMBr->ByteCount +
2536 (char *)&pSMBr->ByteCount;
2537
2538 data_offset = le32_to_cpu(pSMBr->DataOffset);
2539 data_count = le32_to_cpu(pSMBr->DataCount);
2540 parm_offset = le32_to_cpu(pSMBr->ParameterOffset);
2541 parm_count = le32_to_cpu(pSMBr->ParameterCount);
2542
2543 *ppparm = (char *)&pSMBr->hdr.Protocol + parm_offset;
2544 *ppdata = (char *)&pSMBr->hdr.Protocol + data_offset;
2545
2546 /* should we also check that parm and data areas do not overlap? */
2547 if (*ppparm > end_of_smb) {
2548 cFYI(1, "parms start after end of smb");
2549 return -EINVAL;
2550 } else if (parm_count + *ppparm > end_of_smb) {
2551 cFYI(1, "parm end after end of smb");
2552 return -EINVAL;
2553 } else if (*ppdata > end_of_smb) {
2554 cFYI(1, "data starts after end of smb");
2555 return -EINVAL;
2556 } else if (data_count + *ppdata > end_of_smb) {
2557 cFYI(1, "data %p + count %d (%p) past smb end %p start %p",
2558 *ppdata, data_count, (data_count + *ppdata),
2559 end_of_smb, pSMBr);
2560 return -EINVAL;
2561 } else if (parm_count + data_count > pSMBr->ByteCount) {
2562 cFYI(1, "parm count and data count larger than SMB");
2563 return -EINVAL;
2564 }
2565 *pdatalen = data_count;
2566 *pparmlen = parm_count;
2567 return 0;
2568}
2569
2570int 2481int
2571CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon, 2482CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
2572 const unsigned char *searchName, 2483 const unsigned char *searchName,
@@ -3056,7 +2967,97 @@ GetExtAttrOut:
3056 2967
3057#endif /* CONFIG_POSIX */ 2968#endif /* CONFIG_POSIX */
3058 2969
3059#ifdef CONFIG_CIFS_EXPERIMENTAL 2970#ifdef CONFIG_CIFS_ACL
2971/*
2972 * Initialize NT TRANSACT SMB into small smb request buffer. This assumes that
2973 * all NT TRANSACTS that we init here have total parm and data under about 400
2974 * bytes (to fit in small cifs buffer size), which is the case so far, it
2975 * easily fits. NB: Setup words themselves and ByteCount MaxSetupCount (size of
2976 * returned setup area) and MaxParameterCount (returned parms size) must be set
2977 * by caller
2978 */
2979static int
2980smb_init_nttransact(const __u16 sub_command, const int setup_count,
2981 const int parm_len, struct cifsTconInfo *tcon,
2982 void **ret_buf)
2983{
2984 int rc;
2985 __u32 temp_offset;
2986 struct smb_com_ntransact_req *pSMB;
2987
2988 rc = small_smb_init(SMB_COM_NT_TRANSACT, 19 + setup_count, tcon,
2989 (void **)&pSMB);
2990 if (rc)
2991 return rc;
2992 *ret_buf = (void *)pSMB;
2993 pSMB->Reserved = 0;
2994 pSMB->TotalParameterCount = cpu_to_le32(parm_len);
2995 pSMB->TotalDataCount = 0;
2996 pSMB->MaxDataCount = cpu_to_le32((tcon->ses->server->maxBuf -
2997 MAX_CIFS_HDR_SIZE) & 0xFFFFFF00);
2998 pSMB->ParameterCount = pSMB->TotalParameterCount;
2999 pSMB->DataCount = pSMB->TotalDataCount;
3000 temp_offset = offsetof(struct smb_com_ntransact_req, Parms) +
3001 (setup_count * 2) - 4 /* for rfc1001 length itself */;
3002 pSMB->ParameterOffset = cpu_to_le32(temp_offset);
3003 pSMB->DataOffset = cpu_to_le32(temp_offset + parm_len);
3004 pSMB->SetupCount = setup_count; /* no need to le convert byte fields */
3005 pSMB->SubCommand = cpu_to_le16(sub_command);
3006 return 0;
3007}
3008
3009static int
3010validate_ntransact(char *buf, char **ppparm, char **ppdata,
3011 __u32 *pparmlen, __u32 *pdatalen)
3012{
3013 char *end_of_smb;
3014 __u32 data_count, data_offset, parm_count, parm_offset;
3015 struct smb_com_ntransact_rsp *pSMBr;
3016
3017 *pdatalen = 0;
3018 *pparmlen = 0;
3019
3020 if (buf == NULL)
3021 return -EINVAL;
3022
3023 pSMBr = (struct smb_com_ntransact_rsp *)buf;
3024
3025 /* ByteCount was converted from little endian in SendReceive */
3026 end_of_smb = 2 /* sizeof byte count */ + pSMBr->ByteCount +
3027 (char *)&pSMBr->ByteCount;
3028
3029 data_offset = le32_to_cpu(pSMBr->DataOffset);
3030 data_count = le32_to_cpu(pSMBr->DataCount);
3031 parm_offset = le32_to_cpu(pSMBr->ParameterOffset);
3032 parm_count = le32_to_cpu(pSMBr->ParameterCount);
3033
3034 *ppparm = (char *)&pSMBr->hdr.Protocol + parm_offset;
3035 *ppdata = (char *)&pSMBr->hdr.Protocol + data_offset;
3036
3037 /* should we also check that parm and data areas do not overlap? */
3038 if (*ppparm > end_of_smb) {
3039 cFYI(1, "parms start after end of smb");
3040 return -EINVAL;
3041 } else if (parm_count + *ppparm > end_of_smb) {
3042 cFYI(1, "parm end after end of smb");
3043 return -EINVAL;
3044 } else if (*ppdata > end_of_smb) {
3045 cFYI(1, "data starts after end of smb");
3046 return -EINVAL;
3047 } else if (data_count + *ppdata > end_of_smb) {
3048 cFYI(1, "data %p + count %d (%p) past smb end %p start %p",
3049 *ppdata, data_count, (data_count + *ppdata),
3050 end_of_smb, pSMBr);
3051 return -EINVAL;
3052 } else if (parm_count + data_count > pSMBr->ByteCount) {
3053 cFYI(1, "parm count and data count larger than SMB");
3054 return -EINVAL;
3055 }
3056 *pdatalen = data_count;
3057 *pparmlen = parm_count;
3058 return 0;
3059}
3060
3060/* Get Security Descriptor (by handle) from remote server for a file or dir */ 3061/* Get Security Descriptor (by handle) from remote server for a file or dir */
3061int 3062int
3062CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid, 3063CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid,
@@ -3214,7 +3215,7 @@ setCifsAclRetry:
3214 return (rc); 3215 return (rc);
3215} 3216}
3216 3217
3217#endif /* CONFIG_CIFS_EXPERIMENTAL */ 3218#endif /* CONFIG_CIFS_ACL */
3218 3219
3219/* Legacy Query Path Information call for lookup to old servers such 3220/* Legacy Query Path Information call for lookup to old servers such
3220 as Win9x/WinME */ 3221 as Win9x/WinME */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 32fa4d9b5dbc..cc1a8604a790 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -105,6 +105,7 @@ struct smb_vol {
105 unsigned int wsize; 105 unsigned int wsize;
106 bool sockopt_tcp_nodelay:1; 106 bool sockopt_tcp_nodelay:1;
107 unsigned short int port; 107 unsigned short int port;
108 unsigned long actimeo; /* attribute cache timeout (jiffies) */
108 char *prepath; 109 char *prepath;
109 struct sockaddr_storage srcaddr; /* allow binding to a local IP */ 110 struct sockaddr_storage srcaddr; /* allow binding to a local IP */
110 struct nls_table *local_nls; 111 struct nls_table *local_nls;
@@ -806,23 +807,20 @@ cifs_parse_mount_options(char *options, const char *devname,
806 short int override_gid = -1; 807 short int override_gid = -1;
807 bool uid_specified = false; 808 bool uid_specified = false;
808 bool gid_specified = false; 809 bool gid_specified = false;
810 char *nodename = utsname()->nodename;
809 811
810 separator[0] = ','; 812 separator[0] = ',';
811 separator[1] = 0; 813 separator[1] = 0;
812 814
813 if (Local_System_Name[0] != 0) 815 /*
814 memcpy(vol->source_rfc1001_name, Local_System_Name, 15); 816 * does not have to be perfect mapping since field is
815 else { 817 * informational, only used for servers that do not support
816 char *nodename = utsname()->nodename; 818 * port 445 and it can be overridden at mount time
817 int n = strnlen(nodename, 15); 819 */
818 memset(vol->source_rfc1001_name, 0x20, 15); 820 memset(vol->source_rfc1001_name, 0x20, 15);
819 for (i = 0; i < n; i++) { 821 for (i = 0; i < strnlen(nodename, 15); i++)
820 /* does not have to be perfect mapping since field is 822 vol->source_rfc1001_name[i] = toupper(nodename[i]);
821 informational, only used for servers that do not support 823
822 port 445 and it can be overridden at mount time */
823 vol->source_rfc1001_name[i] = toupper(nodename[i]);
824 }
825 }
826 vol->source_rfc1001_name[15] = 0; 824 vol->source_rfc1001_name[15] = 0;
827 /* null target name indicates to use *SMBSERVR default called name 825 /* null target name indicates to use *SMBSERVR default called name
828 if we end up sending RFC1001 session initialize */ 826 if we end up sending RFC1001 session initialize */
@@ -840,6 +838,8 @@ cifs_parse_mount_options(char *options, const char *devname,
840 /* default to using server inode numbers where available */ 838 /* default to using server inode numbers where available */
841 vol->server_ino = 1; 839 vol->server_ino = 1;
842 840
841 vol->actimeo = CIFS_DEF_ACTIMEO;
842
843 if (!options) 843 if (!options)
844 return 1; 844 return 1;
845 845
@@ -1214,6 +1214,16 @@ cifs_parse_mount_options(char *options, const char *devname,
1214 printk(KERN_WARNING "CIFS: server net" 1214 printk(KERN_WARNING "CIFS: server net"
1215 "biosname longer than 15 truncated.\n"); 1215 "biosname longer than 15 truncated.\n");
1216 } 1216 }
1217 } else if (strnicmp(data, "actimeo", 7) == 0) {
1218 if (value && *value) {
1219 vol->actimeo = HZ * simple_strtoul(value,
1220 &value, 0);
1221 if (vol->actimeo > CIFS_MAX_ACTIMEO) {
1222 cERROR(1, "CIFS: attribute cache"
1223 "timeout too large");
1224 return 1;
1225 }
1226 }
1217 } else if (strnicmp(data, "credentials", 4) == 0) { 1227 } else if (strnicmp(data, "credentials", 4) == 0) {
1218 /* ignore */ 1228 /* ignore */
1219 } else if (strnicmp(data, "version", 3) == 0) { 1229 } else if (strnicmp(data, "version", 3) == 0) {
@@ -2571,6 +2581,8 @@ static void setup_cifs_sb(struct smb_vol *pvolume_info,
2571 cFYI(1, "file mode: 0x%x dir mode: 0x%x", 2581 cFYI(1, "file mode: 0x%x dir mode: 0x%x",
2572 cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode); 2582 cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode);
2573 2583
2584 cifs_sb->actimeo = pvolume_info->actimeo;
2585
2574 if (pvolume_info->noperm) 2586 if (pvolume_info->noperm)
2575 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_PERM; 2587 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_PERM;
2576 if (pvolume_info->setuids) 2588 if (pvolume_info->setuids)
@@ -2821,13 +2833,13 @@ remote_path_check:
2821 /* check if a whole path (including prepath) is not remote */ 2833 /* check if a whole path (including prepath) is not remote */
2822 if (!rc && cifs_sb->prepathlen && tcon) { 2834 if (!rc && cifs_sb->prepathlen && tcon) {
2823 /* build_path_to_root works only when we have a valid tcon */ 2835 /* build_path_to_root works only when we have a valid tcon */
2824 full_path = cifs_build_path_to_root(cifs_sb); 2836 full_path = cifs_build_path_to_root(cifs_sb, tcon);
2825 if (full_path == NULL) { 2837 if (full_path == NULL) {
2826 rc = -ENOMEM; 2838 rc = -ENOMEM;
2827 goto mount_fail_check; 2839 goto mount_fail_check;
2828 } 2840 }
2829 rc = is_path_accessible(xid, tcon, cifs_sb, full_path); 2841 rc = is_path_accessible(xid, tcon, cifs_sb, full_path);
2830 if (rc != -EREMOTE) { 2842 if (rc != 0 && rc != -EREMOTE) {
2831 kfree(full_path); 2843 kfree(full_path);
2832 goto mount_fail_check; 2844 goto mount_fail_check;
2833 } 2845 }
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index b857ce5db775..5a28660ca2b5 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1108,7 +1108,6 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file,
1108 return total_written; 1108 return total_written;
1109} 1109}
1110 1110
1111#ifdef CONFIG_CIFS_EXPERIMENTAL
1112struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode, 1111struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1113 bool fsuid_only) 1112 bool fsuid_only)
1114{ 1113{
@@ -1142,7 +1141,6 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1142 spin_unlock(&cifs_file_list_lock); 1141 spin_unlock(&cifs_file_list_lock);
1143 return NULL; 1142 return NULL;
1144} 1143}
1145#endif
1146 1144
1147struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode, 1145struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1148 bool fsuid_only) 1146 bool fsuid_only)
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 28cb6e735943..589f3e3f6e00 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -686,7 +686,7 @@ int cifs_get_inode_info(struct inode **pinode,
686 cFYI(1, "cifs_sfu_type failed: %d", tmprc); 686 cFYI(1, "cifs_sfu_type failed: %d", tmprc);
687 } 687 }
688 688
689#ifdef CONFIG_CIFS_EXPERIMENTAL 689#ifdef CONFIG_CIFS_ACL
690 /* fill in 0777 bits from ACL */ 690 /* fill in 0777 bits from ACL */
691 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { 691 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
692 rc = cifs_acl_to_fattr(cifs_sb, &fattr, *pinode, full_path, 692 rc = cifs_acl_to_fattr(cifs_sb, &fattr, *pinode, full_path,
@@ -697,7 +697,7 @@ int cifs_get_inode_info(struct inode **pinode,
697 goto cgii_exit; 697 goto cgii_exit;
698 } 698 }
699 } 699 }
700#endif 700#endif /* CONFIG_CIFS_ACL */
701 701
702 /* fill in remaining high mode bits e.g. SUID, VTX */ 702 /* fill in remaining high mode bits e.g. SUID, VTX */
703 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) 703 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)
@@ -728,12 +728,12 @@ static const struct inode_operations cifs_ipc_inode_ops = {
728 .lookup = cifs_lookup, 728 .lookup = cifs_lookup,
729}; 729};
730 730
731char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb) 731char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb,
732 struct cifsTconInfo *tcon)
732{ 733{
733 int pplen = cifs_sb->prepathlen; 734 int pplen = cifs_sb->prepathlen;
734 int dfsplen; 735 int dfsplen;
735 char *full_path = NULL; 736 char *full_path = NULL;
736 struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb);
737 737
738 /* if no prefix path, simply set path to the root of share to "" */ 738 /* if no prefix path, simply set path to the root of share to "" */
739 if (pplen == 0) { 739 if (pplen == 0) {
@@ -875,7 +875,7 @@ struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino)
875 char *full_path; 875 char *full_path;
876 struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb); 876 struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb);
877 877
878 full_path = cifs_build_path_to_root(cifs_sb); 878 full_path = cifs_build_path_to_root(cifs_sb, tcon);
879 if (full_path == NULL) 879 if (full_path == NULL)
880 return ERR_PTR(-ENOMEM); 880 return ERR_PTR(-ENOMEM);
881 881
@@ -1653,6 +1653,7 @@ static bool
1653cifs_inode_needs_reval(struct inode *inode) 1653cifs_inode_needs_reval(struct inode *inode)
1654{ 1654{
1655 struct cifsInodeInfo *cifs_i = CIFS_I(inode); 1655 struct cifsInodeInfo *cifs_i = CIFS_I(inode);
1656 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1656 1657
1657 if (cifs_i->clientCanCacheRead) 1658 if (cifs_i->clientCanCacheRead)
1658 return false; 1659 return false;
@@ -1663,12 +1664,12 @@ cifs_inode_needs_reval(struct inode *inode)
1663 if (cifs_i->time == 0) 1664 if (cifs_i->time == 0)
1664 return true; 1665 return true;
1665 1666
1666 /* FIXME: the actimeo should be tunable */ 1667 if (!time_in_range(jiffies, cifs_i->time,
1667 if (time_after_eq(jiffies, cifs_i->time + HZ)) 1668 cifs_i->time + cifs_sb->actimeo))
1668 return true; 1669 return true;
1669 1670
1670 /* hardlinked files w/ noserverino get "special" treatment */ 1671 /* hardlinked files w/ noserverino get "special" treatment */
1671 if (!(CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) && 1672 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) &&
1672 S_ISREG(inode->i_mode) && inode->i_nlink != 1) 1673 S_ISREG(inode->i_mode) && inode->i_nlink != 1)
1673 return true; 1674 return true;
1674 1675
@@ -2121,7 +2122,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
2121 2122
2122 if (attrs->ia_valid & ATTR_MODE) { 2123 if (attrs->ia_valid & ATTR_MODE) {
2123 rc = 0; 2124 rc = 0;
2124#ifdef CONFIG_CIFS_EXPERIMENTAL 2125#ifdef CONFIG_CIFS_ACL
2125 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { 2126 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
2126 rc = mode_to_cifs_acl(inode, full_path, mode); 2127 rc = mode_to_cifs_acl(inode, full_path, mode);
2127 if (rc) { 2128 if (rc) {
@@ -2130,7 +2131,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
2130 goto cifs_setattr_exit; 2131 goto cifs_setattr_exit;
2131 } 2132 }
2132 } else 2133 } else
2133#endif 2134#endif /* CONFIG_CIFS_ACL */
2134 if (((mode & S_IWUGO) == 0) && 2135 if (((mode & S_IWUGO) == 0) &&
2135 (cifsInode->cifsAttrs & ATTR_READONLY) == 0) { 2136 (cifsInode->cifsAttrs & ATTR_READONLY) == 0) {
2136 2137
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 32d300e8f20e..a73eb9f4bdaf 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -759,18 +759,6 @@ static int cifs_filldir(char *pfindEntry, struct file *file, filldir_t filldir,
759 rc = filldir(direntry, qstring.name, qstring.len, file->f_pos, 759 rc = filldir(direntry, qstring.name, qstring.len, file->f_pos,
760 ino, fattr.cf_dtype); 760 ino, fattr.cf_dtype);
761 761
762 /*
763 * we can not return filldir errors to the caller since they are
764 * "normal" when the stat blocksize is too small - we return remapped
765 * error instead
766 *
767 * FIXME: This looks bogus. filldir returns -EOVERFLOW in the above
768 * case already. Why should we be clobbering other errors from it?
769 */
770 if (rc) {
771 cFYI(1, "filldir rc = %d", rc);
772 rc = -EOVERFLOW;
773 }
774 dput(tmp_dentry); 762 dput(tmp_dentry);
775 return rc; 763 return rc;
776} 764}
diff --git a/fs/exec.c b/fs/exec.c
index d68c378a3137..c62efcb959c7 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -275,6 +275,11 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
275 vma->vm_flags = VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP; 275 vma->vm_flags = VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP;
276 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); 276 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
277 INIT_LIST_HEAD(&vma->anon_vma_chain); 277 INIT_LIST_HEAD(&vma->anon_vma_chain);
278
279 err = security_file_mmap(NULL, 0, 0, 0, vma->vm_start, 1);
280 if (err)
281 goto err;
282
278 err = insert_vm_struct(mm, vma); 283 err = insert_vm_struct(mm, vma);
279 if (err) 284 if (err)
280 goto err; 285 goto err;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 6a5edea2d70b..94ce3d7a1c4b 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -910,6 +910,7 @@ struct ext4_inode_info {
910#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ 910#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
911#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ 911#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
912#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ 912#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */
913#define EXT4_MOUNT_MBLK_IO_SUBMIT 0x4000000 /* multi-block io submits */
913#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ 914#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
914#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ 915#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
915#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ 916#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index bdbe69902207..e659597b690b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2125,9 +2125,12 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
2125 */ 2125 */
2126 if (unlikely(journal_data && PageChecked(page))) 2126 if (unlikely(journal_data && PageChecked(page)))
2127 err = __ext4_journalled_writepage(page, len); 2127 err = __ext4_journalled_writepage(page, len);
2128 else 2128 else if (test_opt(inode->i_sb, MBLK_IO_SUBMIT))
2129 err = ext4_bio_write_page(&io_submit, page, 2129 err = ext4_bio_write_page(&io_submit, page,
2130 len, mpd->wbc); 2130 len, mpd->wbc);
2131 else
2132 err = block_write_full_page(page,
2133 noalloc_get_block_write, mpd->wbc);
2131 2134
2132 if (!err) 2135 if (!err)
2133 mpd->pages_written++; 2136 mpd->pages_written++;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 92203b8a099f..dc40e75cba88 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -872,7 +872,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
872 if (namelen > EXT4_NAME_LEN) 872 if (namelen > EXT4_NAME_LEN)
873 return NULL; 873 return NULL;
874 if ((namelen <= 2) && (name[0] == '.') && 874 if ((namelen <= 2) && (name[0] == '.') &&
875 (name[1] == '.' || name[1] == '0')) { 875 (name[1] == '.' || name[1] == '\0')) {
876 /* 876 /*
877 * "." or ".." will only be in the first block 877 * "." or ".." will only be in the first block
878 * NFS may look up ".."; "." should be handled by the VFS 878 * NFS may look up ".."; "." should be handled by the VFS
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index dc963929de65..981c8477adab 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -232,6 +232,8 @@ static int setup_new_group_blocks(struct super_block *sb,
232 GFP_NOFS); 232 GFP_NOFS);
233 if (err) 233 if (err)
234 goto exit_bh; 234 goto exit_bh;
235 for (i = 0, bit = gdblocks + 1; i < reserved_gdb; i++, bit++)
236 ext4_set_bit(bit, bh->b_data);
235 237
236 ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap, 238 ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap,
237 input->block_bitmap - start); 239 input->block_bitmap - start);
@@ -247,6 +249,9 @@ static int setup_new_group_blocks(struct super_block *sb,
247 err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS); 249 err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS);
248 if (err) 250 if (err)
249 goto exit_bh; 251 goto exit_bh;
252 for (i = 0, bit = input->inode_table - start;
253 i < sbi->s_itb_per_group; i++, bit++)
254 ext4_set_bit(bit, bh->b_data);
250 255
251 if ((err = extend_or_restart_transaction(handle, 2, bh))) 256 if ((err = extend_or_restart_transaction(handle, 2, bh)))
252 goto exit_bh; 257 goto exit_bh;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index e32195d6aac3..fb15c9c0be74 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1026,6 +1026,8 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
1026 !(def_mount_opts & EXT4_DEFM_NODELALLOC)) 1026 !(def_mount_opts & EXT4_DEFM_NODELALLOC))
1027 seq_puts(seq, ",nodelalloc"); 1027 seq_puts(seq, ",nodelalloc");
1028 1028
1029 if (test_opt(sb, MBLK_IO_SUBMIT))
1030 seq_puts(seq, ",mblk_io_submit");
1029 if (sbi->s_stripe) 1031 if (sbi->s_stripe)
1030 seq_printf(seq, ",stripe=%lu", sbi->s_stripe); 1032 seq_printf(seq, ",stripe=%lu", sbi->s_stripe);
1031 /* 1033 /*
@@ -1239,8 +1241,8 @@ enum {
1239 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, 1241 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
1240 Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, 1242 Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err,
1241 Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version, 1243 Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version,
1242 Opt_stripe, Opt_delalloc, Opt_nodelalloc, 1244 Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
1243 Opt_block_validity, Opt_noblock_validity, 1245 Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
1244 Opt_inode_readahead_blks, Opt_journal_ioprio, 1246 Opt_inode_readahead_blks, Opt_journal_ioprio,
1245 Opt_dioread_nolock, Opt_dioread_lock, 1247 Opt_dioread_nolock, Opt_dioread_lock,
1246 Opt_discard, Opt_nodiscard, 1248 Opt_discard, Opt_nodiscard,
@@ -1304,6 +1306,8 @@ static const match_table_t tokens = {
1304 {Opt_resize, "resize"}, 1306 {Opt_resize, "resize"},
1305 {Opt_delalloc, "delalloc"}, 1307 {Opt_delalloc, "delalloc"},
1306 {Opt_nodelalloc, "nodelalloc"}, 1308 {Opt_nodelalloc, "nodelalloc"},
1309 {Opt_mblk_io_submit, "mblk_io_submit"},
1310 {Opt_nomblk_io_submit, "nomblk_io_submit"},
1307 {Opt_block_validity, "block_validity"}, 1311 {Opt_block_validity, "block_validity"},
1308 {Opt_noblock_validity, "noblock_validity"}, 1312 {Opt_noblock_validity, "noblock_validity"},
1309 {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, 1313 {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
@@ -1725,6 +1729,12 @@ set_qf_format:
1725 case Opt_nodelalloc: 1729 case Opt_nodelalloc:
1726 clear_opt(sbi->s_mount_opt, DELALLOC); 1730 clear_opt(sbi->s_mount_opt, DELALLOC);
1727 break; 1731 break;
1732 case Opt_mblk_io_submit:
1733 set_opt(sbi->s_mount_opt, MBLK_IO_SUBMIT);
1734 break;
1735 case Opt_nomblk_io_submit:
1736 clear_opt(sbi->s_mount_opt, MBLK_IO_SUBMIT);
1737 break;
1728 case Opt_stripe: 1738 case Opt_stripe:
1729 if (match_int(&args[0], &option)) 1739 if (match_int(&args[0], &option))
1730 return 0; 1740 return 0;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 9242d294fe90..8b984a2cebbd 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -13,6 +13,7 @@
13#include <linux/kernel.h> 13#include <linux/kernel.h>
14#include <linux/sched.h> 14#include <linux/sched.h>
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/compat.h>
16 17
17static const struct file_operations fuse_direct_io_file_operations; 18static const struct file_operations fuse_direct_io_file_operations;
18 19
@@ -1628,6 +1629,58 @@ static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov,
1628} 1629}
1629 1630
1630/* 1631/*
1632 * CUSE servers compiled on 32bit broke on 64bit kernels because the
1633 * ABI was defined to be 'struct iovec' which is different on 32bit
1634 * and 64bit. Fortunately we can determine which structure the server
1635 * used from the size of the reply.
1636 */
1637static int fuse_copy_ioctl_iovec(struct iovec *dst, void *src,
1638 size_t transferred, unsigned count,
1639 bool is_compat)
1640{
1641#ifdef CONFIG_COMPAT
1642 if (count * sizeof(struct compat_iovec) == transferred) {
1643 struct compat_iovec *ciov = src;
1644 unsigned i;
1645
1646 /*
1647 * With this interface a 32bit server cannot support
1648 * non-compat (i.e. ones coming from 64bit apps) ioctl
1649 * requests
1650 */
1651 if (!is_compat)
1652 return -EINVAL;
1653
1654 for (i = 0; i < count; i++) {
1655 dst[i].iov_base = compat_ptr(ciov[i].iov_base);
1656 dst[i].iov_len = ciov[i].iov_len;
1657 }
1658 return 0;
1659 }
1660#endif
1661
1662 if (count * sizeof(struct iovec) != transferred)
1663 return -EIO;
1664
1665 memcpy(dst, src, transferred);
1666 return 0;
1667}
1668
1669/* Make sure iov_length() won't overflow */
1670static int fuse_verify_ioctl_iov(struct iovec *iov, size_t count)
1671{
1672 size_t n;
1673 u32 max = FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT;
1674
1675 for (n = 0; n < count; n++) {
1676 if (iov->iov_len > (size_t) max)
1677 return -ENOMEM;
1678 max -= iov->iov_len;
1679 }
1680 return 0;
1681}
1682
1683/*
1631 * For ioctls, there is no generic way to determine how much memory 1684 * For ioctls, there is no generic way to determine how much memory
1632 * needs to be read and/or written. Furthermore, ioctls are allowed 1685 * needs to be read and/or written. Furthermore, ioctls are allowed
1633 * to dereference the passed pointer, so the parameter requires deep 1686 * to dereference the passed pointer, so the parameter requires deep
@@ -1808,18 +1861,25 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
1808 in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV) 1861 in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV)
1809 goto out; 1862 goto out;
1810 1863
1811 err = -EIO;
1812 if ((in_iovs + out_iovs) * sizeof(struct iovec) != transferred)
1813 goto out;
1814
1815 /* okay, copy in iovs and retry */
1816 vaddr = kmap_atomic(pages[0], KM_USER0); 1864 vaddr = kmap_atomic(pages[0], KM_USER0);
1817 memcpy(page_address(iov_page), vaddr, transferred); 1865 err = fuse_copy_ioctl_iovec(page_address(iov_page), vaddr,
1866 transferred, in_iovs + out_iovs,
1867 (flags & FUSE_IOCTL_COMPAT) != 0);
1818 kunmap_atomic(vaddr, KM_USER0); 1868 kunmap_atomic(vaddr, KM_USER0);
1869 if (err)
1870 goto out;
1819 1871
1820 in_iov = page_address(iov_page); 1872 in_iov = page_address(iov_page);
1821 out_iov = in_iov + in_iovs; 1873 out_iov = in_iov + in_iovs;
1822 1874
1875 err = fuse_verify_ioctl_iov(in_iov, in_iovs);
1876 if (err)
1877 goto out;
1878
1879 err = fuse_verify_ioctl_iov(out_iov, out_iovs);
1880 if (err)
1881 goto out;
1882
1823 goto retry; 1883 goto retry;
1824 } 1884 }
1825 1885
diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c
index f46ee8b0e135..9da29706f91c 100644
--- a/fs/logfs/journal.c
+++ b/fs/logfs/journal.c
@@ -828,7 +828,7 @@ void do_logfs_journal_wl_pass(struct super_block *sb)
828 super->s_journal_seg[i] = segno; 828 super->s_journal_seg[i] = segno;
829 super->s_journal_ec[i] = ec; 829 super->s_journal_ec[i] = ec;
830 logfs_set_segment_reserved(sb, segno); 830 logfs_set_segment_reserved(sb, segno);
831 err = btree_insert32(head, segno, (void *)1, GFP_KERNEL); 831 err = btree_insert32(head, segno, (void *)1, GFP_NOFS);
832 BUG_ON(err); /* mempool should prevent this */ 832 BUG_ON(err); /* mempool should prevent this */
833 err = logfs_erase_segment(sb, segno, 1); 833 err = logfs_erase_segment(sb, segno, 1);
834 BUG_ON(err); /* FIXME: remount-ro would be nicer */ 834 BUG_ON(err); /* FIXME: remount-ro would be nicer */
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 6127baf0e188..ee99a9f5dfd3 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -1994,6 +1994,9 @@ static int do_write_inode(struct inode *inode)
1994 1994
1995 /* FIXME: transaction is part of logfs_block now. Is that enough? */ 1995 /* FIXME: transaction is part of logfs_block now. Is that enough? */
1996 err = logfs_write_buf(master_inode, page, 0); 1996 err = logfs_write_buf(master_inode, page, 0);
1997 if (err)
1998 move_page_to_inode(inode, page);
1999
1997 logfs_put_write_page(page); 2000 logfs_put_write_page(page);
1998 return err; 2001 return err;
1999} 2002}
diff --git a/fs/namei.c b/fs/namei.c
index 5362af9b7372..4ff7ca530533 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1748,6 +1748,9 @@ struct file *do_filp_open(int dfd, const char *pathname,
1748 if (!(open_flag & O_CREAT)) 1748 if (!(open_flag & O_CREAT))
1749 mode = 0; 1749 mode = 0;
1750 1750
1751 /* Must never be set by userspace */
1752 open_flag &= ~FMODE_NONOTIFY;
1753
1751 /* 1754 /*
1752 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only 1755 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
1753 * check for O_DSYNC if the need any syncing at all we enforce it's 1756 * check for O_DSYNC if the need any syncing at all we enforce it's
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index f0a384e2ae63..996dd8989a91 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -57,7 +57,7 @@ static int nfs_rename(struct inode *, struct dentry *,
57 struct inode *, struct dentry *); 57 struct inode *, struct dentry *);
58static int nfs_fsync_dir(struct file *, int); 58static int nfs_fsync_dir(struct file *, int);
59static loff_t nfs_llseek_dir(struct file *, loff_t, int); 59static loff_t nfs_llseek_dir(struct file *, loff_t, int);
60static int nfs_readdir_clear_array(struct page*, gfp_t); 60static void nfs_readdir_clear_array(struct page*);
61 61
62const struct file_operations nfs_dir_operations = { 62const struct file_operations nfs_dir_operations = {
63 .llseek = nfs_llseek_dir, 63 .llseek = nfs_llseek_dir,
@@ -83,8 +83,8 @@ const struct inode_operations nfs_dir_inode_operations = {
83 .setattr = nfs_setattr, 83 .setattr = nfs_setattr,
84}; 84};
85 85
86const struct address_space_operations nfs_dir_addr_space_ops = { 86const struct address_space_operations nfs_dir_aops = {
87 .releasepage = nfs_readdir_clear_array, 87 .freepage = nfs_readdir_clear_array,
88}; 88};
89 89
90#ifdef CONFIG_NFS_V3 90#ifdef CONFIG_NFS_V3
@@ -178,6 +178,7 @@ typedef struct {
178 struct page *page; 178 struct page *page;
179 unsigned long page_index; 179 unsigned long page_index;
180 u64 *dir_cookie; 180 u64 *dir_cookie;
181 u64 last_cookie;
181 loff_t current_index; 182 loff_t current_index;
182 decode_dirent_t decode; 183 decode_dirent_t decode;
183 184
@@ -213,17 +214,15 @@ void nfs_readdir_release_array(struct page *page)
213 * we are freeing strings created by nfs_add_to_readdir_array() 214 * we are freeing strings created by nfs_add_to_readdir_array()
214 */ 215 */
215static 216static
216int nfs_readdir_clear_array(struct page *page, gfp_t mask) 217void nfs_readdir_clear_array(struct page *page)
217{ 218{
218 struct nfs_cache_array *array = nfs_readdir_get_array(page); 219 struct nfs_cache_array *array;
219 int i; 220 int i;
220 221
221 if (IS_ERR(array)) 222 array = kmap_atomic(page, KM_USER0);
222 return PTR_ERR(array);
223 for (i = 0; i < array->size; i++) 223 for (i = 0; i < array->size; i++)
224 kfree(array->array[i].string.name); 224 kfree(array->array[i].string.name);
225 nfs_readdir_release_array(page); 225 kunmap_atomic(array, KM_USER0);
226 return 0;
227} 226}
228 227
229/* 228/*
@@ -272,7 +271,7 @@ int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page)
272 goto out; 271 goto out;
273 array->last_cookie = entry->cookie; 272 array->last_cookie = entry->cookie;
274 array->size++; 273 array->size++;
275 if (entry->eof == 1) 274 if (entry->eof != 0)
276 array->eof_index = array->size; 275 array->eof_index = array->size;
277out: 276out:
278 nfs_readdir_release_array(page); 277 nfs_readdir_release_array(page);
@@ -312,15 +311,14 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des
312 for (i = 0; i < array->size; i++) { 311 for (i = 0; i < array->size; i++) {
313 if (array->array[i].cookie == *desc->dir_cookie) { 312 if (array->array[i].cookie == *desc->dir_cookie) {
314 desc->cache_entry_index = i; 313 desc->cache_entry_index = i;
315 status = 0; 314 return 0;
316 goto out;
317 } 315 }
318 } 316 }
319 if (i == array->eof_index) { 317 if (array->eof_index >= 0) {
320 desc->eof = 1;
321 status = -EBADCOOKIE; 318 status = -EBADCOOKIE;
319 if (*desc->dir_cookie == array->last_cookie)
320 desc->eof = 1;
322 } 321 }
323out:
324 return status; 322 return status;
325} 323}
326 324
@@ -328,10 +326,7 @@ static
328int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc) 326int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc)
329{ 327{
330 struct nfs_cache_array *array; 328 struct nfs_cache_array *array;
331 int status = -EBADCOOKIE; 329 int status;
332
333 if (desc->dir_cookie == NULL)
334 goto out;
335 330
336 array = nfs_readdir_get_array(desc->page); 331 array = nfs_readdir_get_array(desc->page);
337 if (IS_ERR(array)) { 332 if (IS_ERR(array)) {
@@ -344,6 +339,10 @@ int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc)
344 else 339 else
345 status = nfs_readdir_search_for_cookie(array, desc); 340 status = nfs_readdir_search_for_cookie(array, desc);
346 341
342 if (status == -EAGAIN) {
343 desc->last_cookie = array->last_cookie;
344 desc->page_index++;
345 }
347 nfs_readdir_release_array(desc->page); 346 nfs_readdir_release_array(desc->page);
348out: 347out:
349 return status; 348 return status;
@@ -490,7 +489,7 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
490 489
491 count++; 490 count++;
492 491
493 if (desc->plus == 1) 492 if (desc->plus != 0)
494 nfs_prime_dcache(desc->file->f_path.dentry, entry); 493 nfs_prime_dcache(desc->file->f_path.dentry, entry);
495 494
496 status = nfs_readdir_add_to_array(entry, page); 495 status = nfs_readdir_add_to_array(entry, page);
@@ -498,7 +497,7 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
498 break; 497 break;
499 } while (!entry->eof); 498 } while (!entry->eof);
500 499
501 if (count == 0 || (status == -EBADCOOKIE && entry->eof == 1)) { 500 if (count == 0 || (status == -EBADCOOKIE && entry->eof != 0)) {
502 array = nfs_readdir_get_array(page); 501 array = nfs_readdir_get_array(page);
503 if (!IS_ERR(array)) { 502 if (!IS_ERR(array)) {
504 array->eof_index = array->size; 503 array->eof_index = array->size;
@@ -563,7 +562,7 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
563 unsigned int array_size = ARRAY_SIZE(pages); 562 unsigned int array_size = ARRAY_SIZE(pages);
564 563
565 entry.prev_cookie = 0; 564 entry.prev_cookie = 0;
566 entry.cookie = *desc->dir_cookie; 565 entry.cookie = desc->last_cookie;
567 entry.eof = 0; 566 entry.eof = 0;
568 entry.fh = nfs_alloc_fhandle(); 567 entry.fh = nfs_alloc_fhandle();
569 entry.fattr = nfs_alloc_fattr(); 568 entry.fattr = nfs_alloc_fattr();
@@ -636,6 +635,8 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page)
636static 635static
637void cache_page_release(nfs_readdir_descriptor_t *desc) 636void cache_page_release(nfs_readdir_descriptor_t *desc)
638{ 637{
638 if (!desc->page->mapping)
639 nfs_readdir_clear_array(desc->page);
639 page_cache_release(desc->page); 640 page_cache_release(desc->page);
640 desc->page = NULL; 641 desc->page = NULL;
641} 642}
@@ -660,9 +661,8 @@ int find_cache_page(nfs_readdir_descriptor_t *desc)
660 return PTR_ERR(desc->page); 661 return PTR_ERR(desc->page);
661 662
662 res = nfs_readdir_search_array(desc); 663 res = nfs_readdir_search_array(desc);
663 if (res == 0) 664 if (res != 0)
664 return 0; 665 cache_page_release(desc);
665 cache_page_release(desc);
666 return res; 666 return res;
667} 667}
668 668
@@ -672,22 +672,16 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
672{ 672{
673 int res; 673 int res;
674 674
675 if (desc->page_index == 0) 675 if (desc->page_index == 0) {
676 desc->current_index = 0; 676 desc->current_index = 0;
677 while (1) { 677 desc->last_cookie = 0;
678 res = find_cache_page(desc);
679 if (res != -EAGAIN)
680 break;
681 desc->page_index++;
682 } 678 }
679 do {
680 res = find_cache_page(desc);
681 } while (res == -EAGAIN);
683 return res; 682 return res;
684} 683}
685 684
686static inline unsigned int dt_type(struct inode *inode)
687{
688 return (inode->i_mode >> 12) & 15;
689}
690
691/* 685/*
692 * Once we've found the start of the dirent within a page: fill 'er up... 686 * Once we've found the start of the dirent within a page: fill 'er up...
693 */ 687 */
@@ -717,13 +711,12 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
717 break; 711 break;
718 } 712 }
719 file->f_pos++; 713 file->f_pos++;
720 desc->cache_entry_index = i;
721 if (i < (array->size-1)) 714 if (i < (array->size-1))
722 *desc->dir_cookie = array->array[i+1].cookie; 715 *desc->dir_cookie = array->array[i+1].cookie;
723 else 716 else
724 *desc->dir_cookie = array->last_cookie; 717 *desc->dir_cookie = array->last_cookie;
725 } 718 }
726 if (i == array->eof_index) 719 if (array->eof_index >= 0)
727 desc->eof = 1; 720 desc->eof = 1;
728 721
729 nfs_readdir_release_array(desc->page); 722 nfs_readdir_release_array(desc->page);
@@ -764,6 +757,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
764 } 757 }
765 758
766 desc->page_index = 0; 759 desc->page_index = 0;
760 desc->last_cookie = *desc->dir_cookie;
767 desc->page = page; 761 desc->page = page;
768 762
769 status = nfs_readdir_xdr_to_array(desc, page, inode); 763 status = nfs_readdir_xdr_to_array(desc, page, inode);
@@ -791,7 +785,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
791 struct inode *inode = dentry->d_inode; 785 struct inode *inode = dentry->d_inode;
792 nfs_readdir_descriptor_t my_desc, 786 nfs_readdir_descriptor_t my_desc,
793 *desc = &my_desc; 787 *desc = &my_desc;
794 int res = -ENOMEM; 788 int res;
795 789
796 dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n", 790 dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n",
797 dentry->d_parent->d_name.name, dentry->d_name.name, 791 dentry->d_parent->d_name.name, dentry->d_name.name,
@@ -816,7 +810,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
816 if (res < 0) 810 if (res < 0)
817 goto out; 811 goto out;
818 812
819 while (desc->eof != 1) { 813 do {
820 res = readdir_search_pagecache(desc); 814 res = readdir_search_pagecache(desc);
821 815
822 if (res == -EBADCOOKIE) { 816 if (res == -EBADCOOKIE) {
@@ -844,7 +838,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
844 res = nfs_do_filldir(desc, dirent, filldir); 838 res = nfs_do_filldir(desc, dirent, filldir);
845 if (res < 0) 839 if (res < 0)
846 break; 840 break;
847 } 841 } while (!desc->eof);
848out: 842out:
849 nfs_unblock_sillyrename(dentry); 843 nfs_unblock_sillyrename(dentry);
850 if (res > 0) 844 if (res > 0)
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 60677f9f1311..7bf029ef4084 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -693,6 +693,7 @@ do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
693{ 693{
694 struct inode *inode = filp->f_mapping->host; 694 struct inode *inode = filp->f_mapping->host;
695 int status = 0; 695 int status = 0;
696 unsigned int saved_type = fl->fl_type;
696 697
697 /* Try local locking first */ 698 /* Try local locking first */
698 posix_test_lock(filp, fl); 699 posix_test_lock(filp, fl);
@@ -700,6 +701,7 @@ do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
700 /* found a conflict */ 701 /* found a conflict */
701 goto out; 702 goto out;
702 } 703 }
704 fl->fl_type = saved_type;
703 705
704 if (nfs_have_delegation(inode, FMODE_READ)) 706 if (nfs_have_delegation(inode, FMODE_READ))
705 goto out_noconflict; 707 goto out_noconflict;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 314f57164602..e67e31c73416 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -289,6 +289,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
289 } else if (S_ISDIR(inode->i_mode)) { 289 } else if (S_ISDIR(inode->i_mode)) {
290 inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops; 290 inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops;
291 inode->i_fop = &nfs_dir_operations; 291 inode->i_fop = &nfs_dir_operations;
292 inode->i_data.a_ops = &nfs_dir_aops;
292 if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)) 293 if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS))
293 set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); 294 set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
294 /* Deal with crossing mountpoints */ 295 /* Deal with crossing mountpoints */
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index eceafe74f473..4f981f1f6689 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -505,13 +505,13 @@ static struct rpc_procinfo mnt3_procedures[] = {
505 505
506static struct rpc_version mnt_version1 = { 506static struct rpc_version mnt_version1 = {
507 .number = 1, 507 .number = 1,
508 .nrprocs = 2, 508 .nrprocs = ARRAY_SIZE(mnt_procedures),
509 .procs = mnt_procedures, 509 .procs = mnt_procedures,
510}; 510};
511 511
512static struct rpc_version mnt_version3 = { 512static struct rpc_version mnt_version3 = {
513 .number = 3, 513 .number = 3,
514 .nrprocs = 2, 514 .nrprocs = ARRAY_SIZE(mnt3_procedures),
515 .procs = mnt3_procedures, 515 .procs = mnt3_procedures,
516}; 516};
517 517
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 6a653ffd8e4e..4435e5e1f904 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3361,6 +3361,8 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen)
3361 ret = nfs_revalidate_inode(server, inode); 3361 ret = nfs_revalidate_inode(server, inode);
3362 if (ret < 0) 3362 if (ret < 0)
3363 return ret; 3363 return ret;
3364 if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL)
3365 nfs_zap_acl_cache(inode);
3364 ret = nfs4_read_cached_acl(inode, buf, buflen); 3366 ret = nfs4_read_cached_acl(inode, buf, buflen);
3365 if (ret != -ENOENT) 3367 if (ret != -ENOENT)
3366 return ret; 3368 return ret;
@@ -3389,6 +3391,13 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
3389 nfs_inode_return_delegation(inode); 3391 nfs_inode_return_delegation(inode);
3390 buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase); 3392 buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
3391 ret = nfs4_call_sync(server, &msg, &arg, &res, 1); 3393 ret = nfs4_call_sync(server, &msg, &arg, &res, 1);
3394 /*
3395 * Acl update can result in inode attribute update.
3396 * so mark the attribute cache invalid.
3397 */
3398 spin_lock(&inode->i_lock);
3399 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR;
3400 spin_unlock(&inode->i_lock);
3392 nfs_access_zap_cache(inode); 3401 nfs_access_zap_cache(inode);
3393 nfs_zap_acl_cache(inode); 3402 nfs_zap_acl_cache(inode);
3394 return ret; 3403 return ret;
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 137b549e63db..b68536cc9046 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -115,7 +115,7 @@ int nfs_set_page_tag_locked(struct nfs_page *req)
115{ 115{
116 if (!nfs_lock_request_dontget(req)) 116 if (!nfs_lock_request_dontget(req))
117 return 0; 117 return 0;
118 if (req->wb_page != NULL) 118 if (test_bit(PG_MAPPED, &req->wb_flags))
119 radix_tree_tag_set(&NFS_I(req->wb_context->path.dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); 119 radix_tree_tag_set(&NFS_I(req->wb_context->path.dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
120 return 1; 120 return 1;
121} 121}
@@ -125,7 +125,7 @@ int nfs_set_page_tag_locked(struct nfs_page *req)
125 */ 125 */
126void nfs_clear_page_tag_locked(struct nfs_page *req) 126void nfs_clear_page_tag_locked(struct nfs_page *req)
127{ 127{
128 if (req->wb_page != NULL) { 128 if (test_bit(PG_MAPPED, &req->wb_flags)) {
129 struct inode *inode = req->wb_context->path.dentry->d_inode; 129 struct inode *inode = req->wb_context->path.dentry->d_inode;
130 struct nfs_inode *nfsi = NFS_I(inode); 130 struct nfs_inode *nfsi = NFS_I(inode);
131 131
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index e4b62c6f5a6e..aedcaa7f291f 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -152,7 +152,6 @@ static void nfs_readpage_release(struct nfs_page *req)
152 (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), 152 (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
153 req->wb_bytes, 153 req->wb_bytes,
154 (long long)req_offset(req)); 154 (long long)req_offset(req));
155 nfs_clear_request(req);
156 nfs_release_request(req); 155 nfs_release_request(req);
157} 156}
158 157
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 3c045044fca2..4100630c9a5b 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1069,12 +1069,10 @@ static int nfs_parse_mount_options(char *raw,
1069 mnt->flags |= NFS_MOUNT_VER3; 1069 mnt->flags |= NFS_MOUNT_VER3;
1070 mnt->version = 3; 1070 mnt->version = 3;
1071 break; 1071 break;
1072#ifdef CONFIG_NFS_V4
1073 case Opt_v4: 1072 case Opt_v4:
1074 mnt->flags &= ~NFS_MOUNT_VER3; 1073 mnt->flags &= ~NFS_MOUNT_VER3;
1075 mnt->version = 4; 1074 mnt->version = 4;
1076 break; 1075 break;
1077#endif
1078 case Opt_udp: 1076 case Opt_udp:
1079 mnt->flags &= ~NFS_MOUNT_TCP; 1077 mnt->flags &= ~NFS_MOUNT_TCP;
1080 mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; 1078 mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP;
@@ -1286,12 +1284,10 @@ static int nfs_parse_mount_options(char *raw,
1286 mnt->flags |= NFS_MOUNT_VER3; 1284 mnt->flags |= NFS_MOUNT_VER3;
1287 mnt->version = 3; 1285 mnt->version = 3;
1288 break; 1286 break;
1289#ifdef CONFIG_NFS_V4
1290 case NFS4_VERSION: 1287 case NFS4_VERSION:
1291 mnt->flags &= ~NFS_MOUNT_VER3; 1288 mnt->flags &= ~NFS_MOUNT_VER3;
1292 mnt->version = 4; 1289 mnt->version = 4;
1293 break; 1290 break;
1294#endif
1295 default: 1291 default:
1296 goto out_invalid_value; 1292 goto out_invalid_value;
1297 } 1293 }
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 4c14c17a5276..10d648ea128b 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -390,6 +390,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
390 if (nfs_have_delegation(inode, FMODE_WRITE)) 390 if (nfs_have_delegation(inode, FMODE_WRITE))
391 nfsi->change_attr++; 391 nfsi->change_attr++;
392 } 392 }
393 set_bit(PG_MAPPED, &req->wb_flags);
393 SetPagePrivate(req->wb_page); 394 SetPagePrivate(req->wb_page);
394 set_page_private(req->wb_page, (unsigned long)req); 395 set_page_private(req->wb_page, (unsigned long)req);
395 nfsi->npages++; 396 nfsi->npages++;
@@ -415,6 +416,7 @@ static void nfs_inode_remove_request(struct nfs_page *req)
415 spin_lock(&inode->i_lock); 416 spin_lock(&inode->i_lock);
416 set_page_private(req->wb_page, 0); 417 set_page_private(req->wb_page, 0);
417 ClearPagePrivate(req->wb_page); 418 ClearPagePrivate(req->wb_page);
419 clear_bit(PG_MAPPED, &req->wb_flags);
418 radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); 420 radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
419 nfsi->npages--; 421 nfsi->npages--;
420 if (!nfsi->npages) { 422 if (!nfsi->npages) {
@@ -422,7 +424,6 @@ static void nfs_inode_remove_request(struct nfs_page *req)
422 iput(inode); 424 iput(inode);
423 } else 425 } else
424 spin_unlock(&inode->i_lock); 426 spin_unlock(&inode->i_lock);
425 nfs_clear_request(req);
426 nfs_release_request(req); 427 nfs_release_request(req);
427} 428}
428 429
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 2a533a0af2a9..7e84a852cdae 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -260,9 +260,11 @@ void fill_post_wcc(struct svc_fh *fhp)
260 err = vfs_getattr(fhp->fh_export->ex_path.mnt, fhp->fh_dentry, 260 err = vfs_getattr(fhp->fh_export->ex_path.mnt, fhp->fh_dentry,
261 &fhp->fh_post_attr); 261 &fhp->fh_post_attr);
262 fhp->fh_post_change = fhp->fh_dentry->d_inode->i_version; 262 fhp->fh_post_change = fhp->fh_dentry->d_inode->i_version;
263 if (err) 263 if (err) {
264 fhp->fh_post_saved = 0; 264 fhp->fh_post_saved = 0;
265 else 265 /* Grab the ctime anyway - set_change_info might use it */
266 fhp->fh_post_attr.ctime = fhp->fh_dentry->d_inode->i_ctime;
267 } else
266 fhp->fh_post_saved = 1; 268 fhp->fh_post_saved = 1;
267} 269}
268 270
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 4d476ff08ae6..60fce3dc5cb5 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -484,18 +484,17 @@ static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp)
484static inline void 484static inline void
485set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp) 485set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
486{ 486{
487 BUG_ON(!fhp->fh_pre_saved || !fhp->fh_post_saved); 487 BUG_ON(!fhp->fh_pre_saved);
488 cinfo->atomic = 1; 488 cinfo->atomic = fhp->fh_post_saved;
489 cinfo->change_supported = IS_I_VERSION(fhp->fh_dentry->d_inode); 489 cinfo->change_supported = IS_I_VERSION(fhp->fh_dentry->d_inode);
490 if (cinfo->change_supported) { 490
491 cinfo->before_change = fhp->fh_pre_change; 491 cinfo->before_change = fhp->fh_pre_change;
492 cinfo->after_change = fhp->fh_post_change; 492 cinfo->after_change = fhp->fh_post_change;
493 } else { 493 cinfo->before_ctime_sec = fhp->fh_pre_ctime.tv_sec;
494 cinfo->before_ctime_sec = fhp->fh_pre_ctime.tv_sec; 494 cinfo->before_ctime_nsec = fhp->fh_pre_ctime.tv_nsec;
495 cinfo->before_ctime_nsec = fhp->fh_pre_ctime.tv_nsec; 495 cinfo->after_ctime_sec = fhp->fh_post_attr.ctime.tv_sec;
496 cinfo->after_ctime_sec = fhp->fh_post_attr.ctime.tv_sec; 496 cinfo->after_ctime_nsec = fhp->fh_post_attr.ctime.tv_nsec;
497 cinfo->after_ctime_nsec = fhp->fh_post_attr.ctime.tv_nsec; 497
498 }
499} 498}
500 499
501int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *, void *); 500int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index 33ad25ddd5c4..caf9a6a3fb54 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -176,7 +176,6 @@ int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *bh)
176int nilfs_init_gcinode(struct inode *inode) 176int nilfs_init_gcinode(struct inode *inode)
177{ 177{
178 struct nilfs_inode_info *ii = NILFS_I(inode); 178 struct nilfs_inode_info *ii = NILFS_I(inode);
179 struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs;
180 179
181 inode->i_mode = S_IFREG; 180 inode->i_mode = S_IFREG;
182 mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); 181 mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
@@ -186,14 +185,6 @@ int nilfs_init_gcinode(struct inode *inode)
186 ii->i_flags = 0; 185 ii->i_flags = 0;
187 nilfs_bmap_init_gc(ii->i_bmap); 186 nilfs_bmap_init_gc(ii->i_bmap);
188 187
189 /*
190 * Add the inode to GC inode list. Garbage Collection
191 * is serialized and no two processes manipulate the
192 * list simultaneously.
193 */
194 igrab(inode);
195 list_add(&NILFS_I(inode)->i_dirty, &nilfs->ns_gc_inodes);
196
197 return 0; 188 return 0;
198} 189}
199 190
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index e00d9457c256..b185e937a335 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -337,6 +337,7 @@ static int nilfs_ioctl_move_blocks(struct super_block *sb,
337 struct nilfs_argv *argv, void *buf) 337 struct nilfs_argv *argv, void *buf)
338{ 338{
339 size_t nmembs = argv->v_nmembs; 339 size_t nmembs = argv->v_nmembs;
340 struct the_nilfs *nilfs = NILFS_SB(sb)->s_nilfs;
340 struct inode *inode; 341 struct inode *inode;
341 struct nilfs_vdesc *vdesc; 342 struct nilfs_vdesc *vdesc;
342 struct buffer_head *bh, *n; 343 struct buffer_head *bh, *n;
@@ -353,6 +354,17 @@ static int nilfs_ioctl_move_blocks(struct super_block *sb,
353 ret = PTR_ERR(inode); 354 ret = PTR_ERR(inode);
354 goto failed; 355 goto failed;
355 } 356 }
357 if (list_empty(&NILFS_I(inode)->i_dirty)) {
358 /*
359 * Add the inode to GC inode list. Garbage Collection
360 * is serialized and no two processes manipulate the
361 * list simultaneously.
362 */
363 igrab(inode);
364 list_add(&NILFS_I(inode)->i_dirty,
365 &nilfs->ns_gc_inodes);
366 }
367
356 do { 368 do {
357 ret = nilfs_ioctl_move_inode_block(inode, vdesc, 369 ret = nilfs_ioctl_move_inode_block(inode, vdesc,
358 &buffers); 370 &buffers);
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index b04f88eed09e..f35794b97e8e 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -92,7 +92,11 @@ static int fanotify_get_response_from_access(struct fsnotify_group *group,
92 92
93 pr_debug("%s: group=%p event=%p\n", __func__, group, event); 93 pr_debug("%s: group=%p event=%p\n", __func__, group, event);
94 94
95 wait_event(group->fanotify_data.access_waitq, event->response); 95 wait_event(group->fanotify_data.access_waitq, event->response ||
96 atomic_read(&group->fanotify_data.bypass_perm));
97
98 if (!event->response) /* bypass_perm set */
99 return 0;
96 100
97 /* userspace responded, convert to something usable */ 101 /* userspace responded, convert to something usable */
98 spin_lock(&event->lock); 102 spin_lock(&event->lock);
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 063224812b7e..8b61220cffc5 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -106,20 +106,29 @@ static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event)
106 return client_fd; 106 return client_fd;
107} 107}
108 108
109static ssize_t fill_event_metadata(struct fsnotify_group *group, 109static int fill_event_metadata(struct fsnotify_group *group,
110 struct fanotify_event_metadata *metadata, 110 struct fanotify_event_metadata *metadata,
111 struct fsnotify_event *event) 111 struct fsnotify_event *event)
112{ 112{
113 int ret = 0;
114
113 pr_debug("%s: group=%p metadata=%p event=%p\n", __func__, 115 pr_debug("%s: group=%p metadata=%p event=%p\n", __func__,
114 group, metadata, event); 116 group, metadata, event);
115 117
116 metadata->event_len = FAN_EVENT_METADATA_LEN; 118 metadata->event_len = FAN_EVENT_METADATA_LEN;
119 metadata->metadata_len = FAN_EVENT_METADATA_LEN;
117 metadata->vers = FANOTIFY_METADATA_VERSION; 120 metadata->vers = FANOTIFY_METADATA_VERSION;
118 metadata->mask = event->mask & FAN_ALL_OUTGOING_EVENTS; 121 metadata->mask = event->mask & FAN_ALL_OUTGOING_EVENTS;
119 metadata->pid = pid_vnr(event->tgid); 122 metadata->pid = pid_vnr(event->tgid);
120 metadata->fd = create_fd(group, event); 123 if (unlikely(event->mask & FAN_Q_OVERFLOW))
124 metadata->fd = FAN_NOFD;
125 else {
126 metadata->fd = create_fd(group, event);
127 if (metadata->fd < 0)
128 ret = metadata->fd;
129 }
121 130
122 return metadata->fd; 131 return ret;
123} 132}
124 133
125#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS 134#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
@@ -200,7 +209,7 @@ static int prepare_for_access_response(struct fsnotify_group *group,
200 209
201 mutex_lock(&group->fanotify_data.access_mutex); 210 mutex_lock(&group->fanotify_data.access_mutex);
202 211
203 if (group->fanotify_data.bypass_perm) { 212 if (atomic_read(&group->fanotify_data.bypass_perm)) {
204 mutex_unlock(&group->fanotify_data.access_mutex); 213 mutex_unlock(&group->fanotify_data.access_mutex);
205 kmem_cache_free(fanotify_response_event_cache, re); 214 kmem_cache_free(fanotify_response_event_cache, re);
206 event->response = FAN_ALLOW; 215 event->response = FAN_ALLOW;
@@ -257,24 +266,34 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
257 266
258 pr_debug("%s: group=%p event=%p\n", __func__, group, event); 267 pr_debug("%s: group=%p event=%p\n", __func__, group, event);
259 268
260 fd = fill_event_metadata(group, &fanotify_event_metadata, event); 269 ret = fill_event_metadata(group, &fanotify_event_metadata, event);
261 if (fd < 0) 270 if (ret < 0)
262 return fd; 271 goto out;
263 272
273 fd = fanotify_event_metadata.fd;
264 ret = prepare_for_access_response(group, event, fd); 274 ret = prepare_for_access_response(group, event, fd);
265 if (ret) 275 if (ret)
266 goto out_close_fd; 276 goto out_close_fd;
267 277
268 ret = -EFAULT; 278 ret = -EFAULT;
269 if (copy_to_user(buf, &fanotify_event_metadata, FAN_EVENT_METADATA_LEN)) 279 if (copy_to_user(buf, &fanotify_event_metadata,
280 fanotify_event_metadata.event_len))
270 goto out_kill_access_response; 281 goto out_kill_access_response;
271 282
272 return FAN_EVENT_METADATA_LEN; 283 return fanotify_event_metadata.event_len;
273 284
274out_kill_access_response: 285out_kill_access_response:
275 remove_access_response(group, event, fd); 286 remove_access_response(group, event, fd);
276out_close_fd: 287out_close_fd:
277 sys_close(fd); 288 if (fd != FAN_NOFD)
289 sys_close(fd);
290out:
291#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
292 if (event->mask & FAN_ALL_PERM_EVENTS) {
293 event->response = FAN_DENY;
294 wake_up(&group->fanotify_data.access_waitq);
295 }
296#endif
278 return ret; 297 return ret;
279} 298}
280 299
@@ -382,7 +401,7 @@ static int fanotify_release(struct inode *ignored, struct file *file)
382 401
383 mutex_lock(&group->fanotify_data.access_mutex); 402 mutex_lock(&group->fanotify_data.access_mutex);
384 403
385 group->fanotify_data.bypass_perm = true; 404 atomic_inc(&group->fanotify_data.bypass_perm);
386 405
387 list_for_each_entry_safe(re, lre, &group->fanotify_data.access_list, list) { 406 list_for_each_entry_safe(re, lre, &group->fanotify_data.access_list, list) {
388 pr_debug("%s: found group=%p re=%p event=%p\n", __func__, group, 407 pr_debug("%s: found group=%p re=%p event=%p\n", __func__, group,
@@ -586,11 +605,10 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
586{ 605{
587 struct fsnotify_mark *fsn_mark; 606 struct fsnotify_mark *fsn_mark;
588 __u32 added; 607 __u32 added;
608 int ret = 0;
589 609
590 fsn_mark = fsnotify_find_vfsmount_mark(group, mnt); 610 fsn_mark = fsnotify_find_vfsmount_mark(group, mnt);
591 if (!fsn_mark) { 611 if (!fsn_mark) {
592 int ret;
593
594 if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks) 612 if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks)
595 return -ENOSPC; 613 return -ENOSPC;
596 614
@@ -600,17 +618,16 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
600 618
601 fsnotify_init_mark(fsn_mark, fanotify_free_mark); 619 fsnotify_init_mark(fsn_mark, fanotify_free_mark);
602 ret = fsnotify_add_mark(fsn_mark, group, NULL, mnt, 0); 620 ret = fsnotify_add_mark(fsn_mark, group, NULL, mnt, 0);
603 if (ret) { 621 if (ret)
604 fanotify_free_mark(fsn_mark); 622 goto err;
605 return ret;
606 }
607 } 623 }
608 added = fanotify_mark_add_to_mask(fsn_mark, mask, flags); 624 added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
609 fsnotify_put_mark(fsn_mark); 625
610 if (added & ~mnt->mnt_fsnotify_mask) 626 if (added & ~mnt->mnt_fsnotify_mask)
611 fsnotify_recalc_vfsmount_mask(mnt); 627 fsnotify_recalc_vfsmount_mask(mnt);
612 628err:
613 return 0; 629 fsnotify_put_mark(fsn_mark);
630 return ret;
614} 631}
615 632
616static int fanotify_add_inode_mark(struct fsnotify_group *group, 633static int fanotify_add_inode_mark(struct fsnotify_group *group,
@@ -619,6 +636,7 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
619{ 636{
620 struct fsnotify_mark *fsn_mark; 637 struct fsnotify_mark *fsn_mark;
621 __u32 added; 638 __u32 added;
639 int ret = 0;
622 640
623 pr_debug("%s: group=%p inode=%p\n", __func__, group, inode); 641 pr_debug("%s: group=%p inode=%p\n", __func__, group, inode);
624 642
@@ -634,8 +652,6 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
634 652
635 fsn_mark = fsnotify_find_inode_mark(group, inode); 653 fsn_mark = fsnotify_find_inode_mark(group, inode);
636 if (!fsn_mark) { 654 if (!fsn_mark) {
637 int ret;
638
639 if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks) 655 if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks)
640 return -ENOSPC; 656 return -ENOSPC;
641 657
@@ -645,16 +661,16 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
645 661
646 fsnotify_init_mark(fsn_mark, fanotify_free_mark); 662 fsnotify_init_mark(fsn_mark, fanotify_free_mark);
647 ret = fsnotify_add_mark(fsn_mark, group, inode, NULL, 0); 663 ret = fsnotify_add_mark(fsn_mark, group, inode, NULL, 0);
648 if (ret) { 664 if (ret)
649 fanotify_free_mark(fsn_mark); 665 goto err;
650 return ret;
651 }
652 } 666 }
653 added = fanotify_mark_add_to_mask(fsn_mark, mask, flags); 667 added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
654 fsnotify_put_mark(fsn_mark); 668
655 if (added & ~inode->i_fsnotify_mask) 669 if (added & ~inode->i_fsnotify_mask)
656 fsnotify_recalc_inode_mask(inode); 670 fsnotify_recalc_inode_mask(inode);
657 return 0; 671err:
672 fsnotify_put_mark(fsn_mark);
673 return ret;
658} 674}
659 675
660/* fanotify syscalls */ 676/* fanotify syscalls */
@@ -687,8 +703,10 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
687 703
688 /* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */ 704 /* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */
689 group = fsnotify_alloc_group(&fanotify_fsnotify_ops); 705 group = fsnotify_alloc_group(&fanotify_fsnotify_ops);
690 if (IS_ERR(group)) 706 if (IS_ERR(group)) {
707 free_uid(user);
691 return PTR_ERR(group); 708 return PTR_ERR(group);
709 }
692 710
693 group->fanotify_data.user = user; 711 group->fanotify_data.user = user;
694 atomic_inc(&user->fanotify_listeners); 712 atomic_inc(&user->fanotify_listeners);
@@ -698,6 +716,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
698 mutex_init(&group->fanotify_data.access_mutex); 716 mutex_init(&group->fanotify_data.access_mutex);
699 init_waitqueue_head(&group->fanotify_data.access_waitq); 717 init_waitqueue_head(&group->fanotify_data.access_waitq);
700 INIT_LIST_HEAD(&group->fanotify_data.access_list); 718 INIT_LIST_HEAD(&group->fanotify_data.access_list);
719 atomic_set(&group->fanotify_data.bypass_perm, 0);
701#endif 720#endif
702 switch (flags & FAN_ALL_CLASS_BITS) { 721 switch (flags & FAN_ALL_CLASS_BITS) {
703 case FAN_CLASS_NOTIF: 722 case FAN_CLASS_NOTIF:
@@ -764,8 +783,10 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
764 if (flags & ~FAN_ALL_MARK_FLAGS) 783 if (flags & ~FAN_ALL_MARK_FLAGS)
765 return -EINVAL; 784 return -EINVAL;
766 switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) { 785 switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) {
767 case FAN_MARK_ADD: 786 case FAN_MARK_ADD: /* fallthrough */
768 case FAN_MARK_REMOVE: 787 case FAN_MARK_REMOVE:
788 if (!mask)
789 return -EINVAL;
769 case FAN_MARK_FLUSH: 790 case FAN_MARK_FLUSH:
770 break; 791 break;
771 default: 792 default:
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 444c305a468c..4cd5d5d78f9f 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -752,6 +752,7 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
752 if (ret >= 0) 752 if (ret >= 0)
753 return ret; 753 return ret;
754 754
755 fsnotify_put_group(group);
755 atomic_dec(&user->inotify_devs); 756 atomic_dec(&user->inotify_devs);
756out_free_uid: 757out_free_uid:
757 free_uid(user); 758 free_uid(user);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index f1e962cb3b73..0d7c5540ad66 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -573,11 +573,14 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
573 /* this io's submitter should not have unlocked this before we could */ 573 /* this io's submitter should not have unlocked this before we could */
574 BUG_ON(!ocfs2_iocb_is_rw_locked(iocb)); 574 BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
575 575
576 if (ocfs2_iocb_is_sem_locked(iocb)) {
577 up_read(&inode->i_alloc_sem);
578 ocfs2_iocb_clear_sem_locked(iocb);
579 }
580
576 ocfs2_iocb_clear_rw_locked(iocb); 581 ocfs2_iocb_clear_rw_locked(iocb);
577 582
578 level = ocfs2_iocb_rw_locked_level(iocb); 583 level = ocfs2_iocb_rw_locked_level(iocb);
579 if (!level)
580 up_read(&inode->i_alloc_sem);
581 ocfs2_rw_unlock(inode, level); 584 ocfs2_rw_unlock(inode, level);
582 585
583 if (is_async) 586 if (is_async)
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index 76bfdfda691a..eceb456037c1 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -68,8 +68,27 @@ static inline void ocfs2_iocb_set_rw_locked(struct kiocb *iocb, int level)
68 else 68 else
69 clear_bit(1, (unsigned long *)&iocb->private); 69 clear_bit(1, (unsigned long *)&iocb->private);
70} 70}
71
72/*
73 * Using a named enum representing lock types in terms of #N bit stored in
74 * iocb->private, which is going to be used for communication bewteen
75 * ocfs2_dio_end_io() and ocfs2_file_aio_write/read().
76 */
77enum ocfs2_iocb_lock_bits {
78 OCFS2_IOCB_RW_LOCK = 0,
79 OCFS2_IOCB_RW_LOCK_LEVEL,
80 OCFS2_IOCB_SEM,
81 OCFS2_IOCB_NUM_LOCKS
82};
83
71#define ocfs2_iocb_clear_rw_locked(iocb) \ 84#define ocfs2_iocb_clear_rw_locked(iocb) \
72 clear_bit(0, (unsigned long *)&iocb->private) 85 clear_bit(OCFS2_IOCB_RW_LOCK, (unsigned long *)&iocb->private)
73#define ocfs2_iocb_rw_locked_level(iocb) \ 86#define ocfs2_iocb_rw_locked_level(iocb) \
74 test_bit(1, (unsigned long *)&iocb->private) 87 test_bit(OCFS2_IOCB_RW_LOCK_LEVEL, (unsigned long *)&iocb->private)
88#define ocfs2_iocb_set_sem_locked(iocb) \
89 set_bit(OCFS2_IOCB_SEM, (unsigned long *)&iocb->private)
90#define ocfs2_iocb_clear_sem_locked(iocb) \
91 clear_bit(OCFS2_IOCB_SEM, (unsigned long *)&iocb->private)
92#define ocfs2_iocb_is_sem_locked(iocb) \
93 test_bit(OCFS2_IOCB_SEM, (unsigned long *)&iocb->private)
75#endif /* OCFS2_FILE_H */ 94#endif /* OCFS2_FILE_H */
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c
index c7fba396392d..6c61771469af 100644
--- a/fs/ocfs2/cluster/masklog.c
+++ b/fs/ocfs2/cluster/masklog.c
@@ -113,10 +113,11 @@ static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = {
113 define_mask(QUOTA), 113 define_mask(QUOTA),
114 define_mask(REFCOUNT), 114 define_mask(REFCOUNT),
115 define_mask(BASTS), 115 define_mask(BASTS),
116 define_mask(RESERVATIONS),
117 define_mask(CLUSTER),
116 define_mask(ERROR), 118 define_mask(ERROR),
117 define_mask(NOTICE), 119 define_mask(NOTICE),
118 define_mask(KTHREAD), 120 define_mask(KTHREAD),
119 define_mask(RESERVATIONS),
120}; 121};
121 122
122static struct attribute *mlog_attr_ptrs[MLOG_MAX_BITS] = {NULL, }; 123static struct attribute *mlog_attr_ptrs[MLOG_MAX_BITS] = {NULL, };
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index ea2ed9f56c94..34d6544357d9 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -81,7 +81,7 @@
81#include <linux/sched.h> 81#include <linux/sched.h>
82 82
83/* bits that are frequently given and infrequently matched in the low word */ 83/* bits that are frequently given and infrequently matched in the low word */
84/* NOTE: If you add a flag, you need to also update mlog.c! */ 84/* NOTE: If you add a flag, you need to also update masklog.c! */
85#define ML_ENTRY 0x0000000000000001ULL /* func call entry */ 85#define ML_ENTRY 0x0000000000000001ULL /* func call entry */
86#define ML_EXIT 0x0000000000000002ULL /* func call exit */ 86#define ML_EXIT 0x0000000000000002ULL /* func call exit */
87#define ML_TCP 0x0000000000000004ULL /* net cluster/tcp.c */ 87#define ML_TCP 0x0000000000000004ULL /* net cluster/tcp.c */
@@ -114,13 +114,14 @@
114#define ML_XATTR 0x0000000020000000ULL /* ocfs2 extended attributes */ 114#define ML_XATTR 0x0000000020000000ULL /* ocfs2 extended attributes */
115#define ML_QUOTA 0x0000000040000000ULL /* ocfs2 quota operations */ 115#define ML_QUOTA 0x0000000040000000ULL /* ocfs2 quota operations */
116#define ML_REFCOUNT 0x0000000080000000ULL /* refcount tree operations */ 116#define ML_REFCOUNT 0x0000000080000000ULL /* refcount tree operations */
117#define ML_BASTS 0x0000001000000000ULL /* dlmglue asts and basts */ 117#define ML_BASTS 0x0000000100000000ULL /* dlmglue asts and basts */
118#define ML_RESERVATIONS 0x0000000200000000ULL /* ocfs2 alloc reservations */
119#define ML_CLUSTER 0x0000000400000000ULL /* cluster stack */
120
118/* bits that are infrequently given and frequently matched in the high word */ 121/* bits that are infrequently given and frequently matched in the high word */
119#define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */ 122#define ML_ERROR 0x1000000000000000ULL /* sent to KERN_ERR */
120#define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */ 123#define ML_NOTICE 0x2000000000000000ULL /* setn to KERN_NOTICE */
121#define ML_KTHREAD 0x0000000400000000ULL /* kernel thread activity */ 124#define ML_KTHREAD 0x4000000000000000ULL /* kernel thread activity */
122#define ML_RESERVATIONS 0x0000000800000000ULL /* ocfs2 alloc reservations */
123#define ML_CLUSTER 0x0000001000000000ULL /* cluster stack */
124 125
125#define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE) 126#define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE)
126#define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT) 127#define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT)
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index c49f6de0e7ab..d417b3f9b0c7 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -2461,8 +2461,10 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
2461 2461
2462 di->i_dx_root = cpu_to_le64(dr_blkno); 2462 di->i_dx_root = cpu_to_le64(dr_blkno);
2463 2463
2464 spin_lock(&OCFS2_I(dir)->ip_lock);
2464 OCFS2_I(dir)->ip_dyn_features |= OCFS2_INDEXED_DIR_FL; 2465 OCFS2_I(dir)->ip_dyn_features |= OCFS2_INDEXED_DIR_FL;
2465 di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features); 2466 di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features);
2467 spin_unlock(&OCFS2_I(dir)->ip_lock);
2466 2468
2467 ocfs2_journal_dirty(handle, di_bh); 2469 ocfs2_journal_dirty(handle, di_bh);
2468 2470
@@ -4466,8 +4468,10 @@ static int ocfs2_dx_dir_remove_index(struct inode *dir,
4466 goto out_commit; 4468 goto out_commit;
4467 } 4469 }
4468 4470
4471 spin_lock(&OCFS2_I(dir)->ip_lock);
4469 OCFS2_I(dir)->ip_dyn_features &= ~OCFS2_INDEXED_DIR_FL; 4472 OCFS2_I(dir)->ip_dyn_features &= ~OCFS2_INDEXED_DIR_FL;
4470 di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features); 4473 di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features);
4474 spin_unlock(&OCFS2_I(dir)->ip_lock);
4471 di->i_dx_root = cpu_to_le64(0ULL); 4475 di->i_dx_root = cpu_to_le64(0ULL);
4472 4476
4473 ocfs2_journal_dirty(handle, di_bh); 4477 ocfs2_journal_dirty(handle, di_bh);
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index f564b0e5f80d..59f0f6bdfc62 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2346,7 +2346,8 @@ static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data)
2346 */ 2346 */
2347static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm, 2347static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
2348 struct dlm_lock_resource *res, 2348 struct dlm_lock_resource *res,
2349 int *numlocks) 2349 int *numlocks,
2350 int *hasrefs)
2350{ 2351{
2351 int ret; 2352 int ret;
2352 int i; 2353 int i;
@@ -2356,6 +2357,9 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
2356 2357
2357 assert_spin_locked(&res->spinlock); 2358 assert_spin_locked(&res->spinlock);
2358 2359
2360 *numlocks = 0;
2361 *hasrefs = 0;
2362
2359 ret = -EINVAL; 2363 ret = -EINVAL;
2360 if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) { 2364 if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) {
2361 mlog(0, "cannot migrate lockres with unknown owner!\n"); 2365 mlog(0, "cannot migrate lockres with unknown owner!\n");
@@ -2386,7 +2390,13 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
2386 } 2390 }
2387 2391
2388 *numlocks = count; 2392 *numlocks = count;
2389 mlog(0, "migrateable lockres having %d locks\n", *numlocks); 2393
2394 count = find_next_bit(res->refmap, O2NM_MAX_NODES, 0);
2395 if (count < O2NM_MAX_NODES)
2396 *hasrefs = 1;
2397
2398 mlog(0, "%s: res %.*s, Migrateable, locks %d, refs %d\n", dlm->name,
2399 res->lockname.len, res->lockname.name, *numlocks, *hasrefs);
2390 2400
2391leave: 2401leave:
2392 return ret; 2402 return ret;
@@ -2408,7 +2418,7 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
2408 const char *name; 2418 const char *name;
2409 unsigned int namelen; 2419 unsigned int namelen;
2410 int mle_added = 0; 2420 int mle_added = 0;
2411 int numlocks; 2421 int numlocks, hasrefs;
2412 int wake = 0; 2422 int wake = 0;
2413 2423
2414 if (!dlm_grab(dlm)) 2424 if (!dlm_grab(dlm))
@@ -2417,13 +2427,13 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
2417 name = res->lockname.name; 2427 name = res->lockname.name;
2418 namelen = res->lockname.len; 2428 namelen = res->lockname.len;
2419 2429
2420 mlog(0, "migrating %.*s to %u\n", namelen, name, target); 2430 mlog(0, "%s: Migrating %.*s to %u\n", dlm->name, namelen, name, target);
2421 2431
2422 /* 2432 /*
2423 * ensure this lockres is a proper candidate for migration 2433 * ensure this lockres is a proper candidate for migration
2424 */ 2434 */
2425 spin_lock(&res->spinlock); 2435 spin_lock(&res->spinlock);
2426 ret = dlm_is_lockres_migrateable(dlm, res, &numlocks); 2436 ret = dlm_is_lockres_migrateable(dlm, res, &numlocks, &hasrefs);
2427 if (ret < 0) { 2437 if (ret < 0) {
2428 spin_unlock(&res->spinlock); 2438 spin_unlock(&res->spinlock);
2429 goto leave; 2439 goto leave;
@@ -2431,10 +2441,8 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
2431 spin_unlock(&res->spinlock); 2441 spin_unlock(&res->spinlock);
2432 2442
2433 /* no work to do */ 2443 /* no work to do */
2434 if (numlocks == 0) { 2444 if (numlocks == 0 && !hasrefs)
2435 mlog(0, "no locks were found on this lockres! done!\n");
2436 goto leave; 2445 goto leave;
2437 }
2438 2446
2439 /* 2447 /*
2440 * preallocate up front 2448 * preallocate up front
@@ -2459,14 +2467,14 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
2459 * find a node to migrate the lockres to 2467 * find a node to migrate the lockres to
2460 */ 2468 */
2461 2469
2462 mlog(0, "picking a migration node\n");
2463 spin_lock(&dlm->spinlock); 2470 spin_lock(&dlm->spinlock);
2464 /* pick a new node */ 2471 /* pick a new node */
2465 if (!test_bit(target, dlm->domain_map) || 2472 if (!test_bit(target, dlm->domain_map) ||
2466 target >= O2NM_MAX_NODES) { 2473 target >= O2NM_MAX_NODES) {
2467 target = dlm_pick_migration_target(dlm, res); 2474 target = dlm_pick_migration_target(dlm, res);
2468 } 2475 }
2469 mlog(0, "node %u chosen for migration\n", target); 2476 mlog(0, "%s: res %.*s, Node %u chosen for migration\n", dlm->name,
2477 namelen, name, target);
2470 2478
2471 if (target >= O2NM_MAX_NODES || 2479 if (target >= O2NM_MAX_NODES ||
2472 !test_bit(target, dlm->domain_map)) { 2480 !test_bit(target, dlm->domain_map)) {
@@ -2667,7 +2675,7 @@ int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
2667{ 2675{
2668 int ret; 2676 int ret;
2669 int lock_dropped = 0; 2677 int lock_dropped = 0;
2670 int numlocks; 2678 int numlocks, hasrefs;
2671 2679
2672 spin_lock(&res->spinlock); 2680 spin_lock(&res->spinlock);
2673 if (res->owner != dlm->node_num) { 2681 if (res->owner != dlm->node_num) {
@@ -2681,8 +2689,8 @@ int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
2681 } 2689 }
2682 2690
2683 /* No need to migrate a lockres having no locks */ 2691 /* No need to migrate a lockres having no locks */
2684 ret = dlm_is_lockres_migrateable(dlm, res, &numlocks); 2692 ret = dlm_is_lockres_migrateable(dlm, res, &numlocks, &hasrefs);
2685 if (ret >= 0 && numlocks == 0) { 2693 if (ret >= 0 && numlocks == 0 && !hasrefs) {
2686 spin_unlock(&res->spinlock); 2694 spin_unlock(&res->spinlock);
2687 goto leave; 2695 goto leave;
2688 } 2696 }
@@ -2915,6 +2923,12 @@ static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm,
2915 } 2923 }
2916 queue++; 2924 queue++;
2917 } 2925 }
2926
2927 nodenum = find_next_bit(res->refmap, O2NM_MAX_NODES, 0);
2928 if (nodenum < O2NM_MAX_NODES) {
2929 spin_unlock(&res->spinlock);
2930 return nodenum;
2931 }
2918 spin_unlock(&res->spinlock); 2932 spin_unlock(&res->spinlock);
2919 mlog(0, "have not found a suitable target yet! checking domain map\n"); 2933 mlog(0, "have not found a suitable target yet! checking domain map\n");
2920 2934
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 77b4c04a2809..f6cba566429d 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2241,11 +2241,15 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
2241 2241
2242 mutex_lock(&inode->i_mutex); 2242 mutex_lock(&inode->i_mutex);
2243 2243
2244 ocfs2_iocb_clear_sem_locked(iocb);
2245
2244relock: 2246relock:
2245 /* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */ 2247 /* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */
2246 if (direct_io) { 2248 if (direct_io) {
2247 down_read(&inode->i_alloc_sem); 2249 down_read(&inode->i_alloc_sem);
2248 have_alloc_sem = 1; 2250 have_alloc_sem = 1;
2251 /* communicate with ocfs2_dio_end_io */
2252 ocfs2_iocb_set_sem_locked(iocb);
2249 } 2253 }
2250 2254
2251 /* 2255 /*
@@ -2382,8 +2386,10 @@ out:
2382 ocfs2_rw_unlock(inode, rw_level); 2386 ocfs2_rw_unlock(inode, rw_level);
2383 2387
2384out_sems: 2388out_sems:
2385 if (have_alloc_sem) 2389 if (have_alloc_sem) {
2386 up_read(&inode->i_alloc_sem); 2390 up_read(&inode->i_alloc_sem);
2391 ocfs2_iocb_clear_sem_locked(iocb);
2392 }
2387 2393
2388 mutex_unlock(&inode->i_mutex); 2394 mutex_unlock(&inode->i_mutex);
2389 2395
@@ -2527,6 +2533,8 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
2527 goto bail; 2533 goto bail;
2528 } 2534 }
2529 2535
2536 ocfs2_iocb_clear_sem_locked(iocb);
2537
2530 /* 2538 /*
2531 * buffered reads protect themselves in ->readpage(). O_DIRECT reads 2539 * buffered reads protect themselves in ->readpage(). O_DIRECT reads
2532 * need locks to protect pending reads from racing with truncate. 2540 * need locks to protect pending reads from racing with truncate.
@@ -2534,6 +2542,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
2534 if (filp->f_flags & O_DIRECT) { 2542 if (filp->f_flags & O_DIRECT) {
2535 down_read(&inode->i_alloc_sem); 2543 down_read(&inode->i_alloc_sem);
2536 have_alloc_sem = 1; 2544 have_alloc_sem = 1;
2545 ocfs2_iocb_set_sem_locked(iocb);
2537 2546
2538 ret = ocfs2_rw_lock(inode, 0); 2547 ret = ocfs2_rw_lock(inode, 0);
2539 if (ret < 0) { 2548 if (ret < 0) {
@@ -2575,8 +2584,10 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
2575 } 2584 }
2576 2585
2577bail: 2586bail:
2578 if (have_alloc_sem) 2587 if (have_alloc_sem) {
2579 up_read(&inode->i_alloc_sem); 2588 up_read(&inode->i_alloc_sem);
2589 ocfs2_iocb_clear_sem_locked(iocb);
2590 }
2580 if (rw_level != -1) 2591 if (rw_level != -1)
2581 ocfs2_rw_unlock(inode, rw_level); 2592 ocfs2_rw_unlock(inode, rw_level);
2582 mlog_exit(ret); 2593 mlog_exit(ret);
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index c2e4f8222e2f..bf2e7764920e 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -350,7 +350,7 @@ enum {
350#define OCFS2_LAST_LOCAL_SYSTEM_INODE LOCAL_GROUP_QUOTA_SYSTEM_INODE 350#define OCFS2_LAST_LOCAL_SYSTEM_INODE LOCAL_GROUP_QUOTA_SYSTEM_INODE
351 NUM_SYSTEM_INODES 351 NUM_SYSTEM_INODES
352}; 352};
353#define NUM_GLOBAL_SYSTEM_INODES OCFS2_LAST_GLOBAL_SYSTEM_INODE 353#define NUM_GLOBAL_SYSTEM_INODES OCFS2_FIRST_LOCAL_SYSTEM_INODE
354#define NUM_LOCAL_SYSTEM_INODES \ 354#define NUM_LOCAL_SYSTEM_INODES \
355 (NUM_SYSTEM_INODES - OCFS2_FIRST_LOCAL_SYSTEM_INODE) 355 (NUM_SYSTEM_INODES - OCFS2_FIRST_LOCAL_SYSTEM_INODE)
356 356
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index d2af0a8381a6..77a59891734e 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -297,6 +297,7 @@ xfs_rename(
297 * it and some incremental backup programs won't work without it. 297 * it and some incremental backup programs won't work without it.
298 */ 298 */
299 xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG); 299 xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG);
300 xfs_trans_log_inode(tp, src_ip, XFS_ILOG_CORE);
300 301
301 /* 302 /*
302 * Adjust the link count on src_dp. This is necessary when 303 * Adjust the link count on src_dp. This is necessary when