diff options
Diffstat (limited to 'fs')
152 files changed, 2489 insertions, 1548 deletions
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index d5c1401f0031..d34896cfb19f 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c | |||
@@ -980,19 +980,11 @@ static int autofs4_root_ioctl_unlocked(struct inode *inode, struct file *filp, | |||
980 | } | 980 | } |
981 | } | 981 | } |
982 | 982 | ||
983 | static DEFINE_MUTEX(autofs4_ioctl_mutex); | ||
984 | |||
985 | static long autofs4_root_ioctl(struct file *filp, | 983 | static long autofs4_root_ioctl(struct file *filp, |
986 | unsigned int cmd, unsigned long arg) | 984 | unsigned int cmd, unsigned long arg) |
987 | { | 985 | { |
988 | long ret; | ||
989 | struct inode *inode = filp->f_dentry->d_inode; | 986 | struct inode *inode = filp->f_dentry->d_inode; |
990 | 987 | return autofs4_root_ioctl_unlocked(inode, filp, cmd, arg); | |
991 | mutex_lock(&autofs4_ioctl_mutex); | ||
992 | ret = autofs4_root_ioctl_unlocked(inode, filp, cmd, arg); | ||
993 | mutex_unlock(&autofs4_ioctl_mutex); | ||
994 | |||
995 | return ret; | ||
996 | } | 988 | } |
997 | 989 | ||
998 | #ifdef CONFIG_COMPAT | 990 | #ifdef CONFIG_COMPAT |
@@ -1002,13 +994,11 @@ static long autofs4_root_compat_ioctl(struct file *filp, | |||
1002 | struct inode *inode = filp->f_path.dentry->d_inode; | 994 | struct inode *inode = filp->f_path.dentry->d_inode; |
1003 | int ret; | 995 | int ret; |
1004 | 996 | ||
1005 | mutex_lock(&autofs4_ioctl_mutex); | ||
1006 | if (cmd == AUTOFS_IOC_READY || cmd == AUTOFS_IOC_FAIL) | 997 | if (cmd == AUTOFS_IOC_READY || cmd == AUTOFS_IOC_FAIL) |
1007 | ret = autofs4_root_ioctl_unlocked(inode, filp, cmd, arg); | 998 | ret = autofs4_root_ioctl_unlocked(inode, filp, cmd, arg); |
1008 | else | 999 | else |
1009 | ret = autofs4_root_ioctl_unlocked(inode, filp, cmd, | 1000 | ret = autofs4_root_ioctl_unlocked(inode, filp, cmd, |
1010 | (unsigned long)compat_ptr(arg)); | 1001 | (unsigned long)compat_ptr(arg)); |
1011 | mutex_unlock(&autofs4_ioctl_mutex); | ||
1012 | 1002 | ||
1013 | return ret; | 1003 | return ret; |
1014 | } | 1004 | } |
@@ -370,6 +370,9 @@ struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs) | |||
370 | { | 370 | { |
371 | struct bio *bio; | 371 | struct bio *bio; |
372 | 372 | ||
373 | if (nr_iovecs > UIO_MAXIOV) | ||
374 | return NULL; | ||
375 | |||
373 | bio = kmalloc(sizeof(struct bio) + nr_iovecs * sizeof(struct bio_vec), | 376 | bio = kmalloc(sizeof(struct bio) + nr_iovecs * sizeof(struct bio_vec), |
374 | gfp_mask); | 377 | gfp_mask); |
375 | if (unlikely(!bio)) | 378 | if (unlikely(!bio)) |
@@ -697,8 +700,12 @@ static void bio_free_map_data(struct bio_map_data *bmd) | |||
697 | static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count, | 700 | static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count, |
698 | gfp_t gfp_mask) | 701 | gfp_t gfp_mask) |
699 | { | 702 | { |
700 | struct bio_map_data *bmd = kmalloc(sizeof(*bmd), gfp_mask); | 703 | struct bio_map_data *bmd; |
701 | 704 | ||
705 | if (iov_count > UIO_MAXIOV) | ||
706 | return NULL; | ||
707 | |||
708 | bmd = kmalloc(sizeof(*bmd), gfp_mask); | ||
702 | if (!bmd) | 709 | if (!bmd) |
703 | return NULL; | 710 | return NULL; |
704 | 711 | ||
@@ -827,6 +834,12 @@ struct bio *bio_copy_user_iov(struct request_queue *q, | |||
827 | end = (uaddr + iov[i].iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT; | 834 | end = (uaddr + iov[i].iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT; |
828 | start = uaddr >> PAGE_SHIFT; | 835 | start = uaddr >> PAGE_SHIFT; |
829 | 836 | ||
837 | /* | ||
838 | * Overflow, abort | ||
839 | */ | ||
840 | if (end < start) | ||
841 | return ERR_PTR(-EINVAL); | ||
842 | |||
830 | nr_pages += end - start; | 843 | nr_pages += end - start; |
831 | len += iov[i].iov_len; | 844 | len += iov[i].iov_len; |
832 | } | 845 | } |
@@ -955,6 +968,12 @@ static struct bio *__bio_map_user_iov(struct request_queue *q, | |||
955 | unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; | 968 | unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; |
956 | unsigned long start = uaddr >> PAGE_SHIFT; | 969 | unsigned long start = uaddr >> PAGE_SHIFT; |
957 | 970 | ||
971 | /* | ||
972 | * Overflow, abort | ||
973 | */ | ||
974 | if (end < start) | ||
975 | return ERR_PTR(-EINVAL); | ||
976 | |||
958 | nr_pages += end - start; | 977 | nr_pages += end - start; |
959 | /* | 978 | /* |
960 | * buffer must be aligned to at least hardsector size for now | 979 | * buffer must be aligned to at least hardsector size for now |
@@ -982,7 +1001,7 @@ static struct bio *__bio_map_user_iov(struct request_queue *q, | |||
982 | unsigned long start = uaddr >> PAGE_SHIFT; | 1001 | unsigned long start = uaddr >> PAGE_SHIFT; |
983 | const int local_nr_pages = end - start; | 1002 | const int local_nr_pages = end - start; |
984 | const int page_limit = cur_page + local_nr_pages; | 1003 | const int page_limit = cur_page + local_nr_pages; |
985 | 1004 | ||
986 | ret = get_user_pages_fast(uaddr, local_nr_pages, | 1005 | ret = get_user_pages_fast(uaddr, local_nr_pages, |
987 | write_to_vm, &pages[cur_page]); | 1006 | write_to_vm, &pages[cur_page]); |
988 | if (ret < local_nr_pages) { | 1007 | if (ret < local_nr_pages) { |
diff --git a/fs/block_dev.c b/fs/block_dev.c index 06e8ff12b97c..4230252fd689 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -11,7 +11,6 @@ | |||
11 | #include <linux/slab.h> | 11 | #include <linux/slab.h> |
12 | #include <linux/kmod.h> | 12 | #include <linux/kmod.h> |
13 | #include <linux/major.h> | 13 | #include <linux/major.h> |
14 | #include <linux/smp_lock.h> | ||
15 | #include <linux/device_cgroup.h> | 14 | #include <linux/device_cgroup.h> |
16 | #include <linux/highmem.h> | 15 | #include <linux/highmem.h> |
17 | #include <linux/blkdev.h> | 16 | #include <linux/blkdev.h> |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 7845d1f7d1d9..b50bc4bd5c56 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
@@ -91,23 +91,10 @@ static inline int compressed_bio_size(struct btrfs_root *root, | |||
91 | static struct bio *compressed_bio_alloc(struct block_device *bdev, | 91 | static struct bio *compressed_bio_alloc(struct block_device *bdev, |
92 | u64 first_byte, gfp_t gfp_flags) | 92 | u64 first_byte, gfp_t gfp_flags) |
93 | { | 93 | { |
94 | struct bio *bio; | ||
95 | int nr_vecs; | 94 | int nr_vecs; |
96 | 95 | ||
97 | nr_vecs = bio_get_nr_vecs(bdev); | 96 | nr_vecs = bio_get_nr_vecs(bdev); |
98 | bio = bio_alloc(gfp_flags, nr_vecs); | 97 | return btrfs_bio_alloc(bdev, first_byte >> 9, nr_vecs, gfp_flags); |
99 | |||
100 | if (bio == NULL && (current->flags & PF_MEMALLOC)) { | ||
101 | while (!bio && (nr_vecs /= 2)) | ||
102 | bio = bio_alloc(gfp_flags, nr_vecs); | ||
103 | } | ||
104 | |||
105 | if (bio) { | ||
106 | bio->bi_size = 0; | ||
107 | bio->bi_bdev = bdev; | ||
108 | bio->bi_sector = first_byte >> 9; | ||
109 | } | ||
110 | return bio; | ||
111 | } | 98 | } |
112 | 99 | ||
113 | static int check_compressed_csum(struct inode *inode, | 100 | static int check_compressed_csum(struct inode *inode, |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8db9234f6b41..af52f6d7a4d8 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -808,9 +808,9 @@ struct btrfs_block_group_cache { | |||
808 | int extents_thresh; | 808 | int extents_thresh; |
809 | int free_extents; | 809 | int free_extents; |
810 | int total_bitmaps; | 810 | int total_bitmaps; |
811 | int ro:1; | 811 | unsigned int ro:1; |
812 | int dirty:1; | 812 | unsigned int dirty:1; |
813 | int iref:1; | 813 | unsigned int iref:1; |
814 | 814 | ||
815 | int disk_cache_state; | 815 | int disk_cache_state; |
816 | 816 | ||
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index fb827d0d7181..51d2e4de34eb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/freezer.h> | 28 | #include <linux/freezer.h> |
29 | #include <linux/crc32c.h> | 29 | #include <linux/crc32c.h> |
30 | #include <linux/slab.h> | 30 | #include <linux/slab.h> |
31 | #include <linux/migrate.h> | ||
31 | #include "compat.h" | 32 | #include "compat.h" |
32 | #include "ctree.h" | 33 | #include "ctree.h" |
33 | #include "disk-io.h" | 34 | #include "disk-io.h" |
@@ -355,6 +356,8 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) | |||
355 | ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, | 356 | ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, |
356 | btrfs_header_generation(eb)); | 357 | btrfs_header_generation(eb)); |
357 | BUG_ON(ret); | 358 | BUG_ON(ret); |
359 | WARN_ON(!btrfs_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN)); | ||
360 | |||
358 | found_start = btrfs_header_bytenr(eb); | 361 | found_start = btrfs_header_bytenr(eb); |
359 | if (found_start != start) { | 362 | if (found_start != start) { |
360 | WARN_ON(1); | 363 | WARN_ON(1); |
@@ -693,6 +696,27 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
693 | __btree_submit_bio_done); | 696 | __btree_submit_bio_done); |
694 | } | 697 | } |
695 | 698 | ||
699 | #ifdef CONFIG_MIGRATION | ||
700 | static int btree_migratepage(struct address_space *mapping, | ||
701 | struct page *newpage, struct page *page) | ||
702 | { | ||
703 | /* | ||
704 | * we can't safely write a btree page from here, | ||
705 | * we haven't done the locking hook | ||
706 | */ | ||
707 | if (PageDirty(page)) | ||
708 | return -EAGAIN; | ||
709 | /* | ||
710 | * Buffers may be managed in a filesystem specific way. | ||
711 | * We must have no buffers or drop them. | ||
712 | */ | ||
713 | if (page_has_private(page) && | ||
714 | !try_to_release_page(page, GFP_KERNEL)) | ||
715 | return -EAGAIN; | ||
716 | return migrate_page(mapping, newpage, page); | ||
717 | } | ||
718 | #endif | ||
719 | |||
696 | static int btree_writepage(struct page *page, struct writeback_control *wbc) | 720 | static int btree_writepage(struct page *page, struct writeback_control *wbc) |
697 | { | 721 | { |
698 | struct extent_io_tree *tree; | 722 | struct extent_io_tree *tree; |
@@ -707,8 +731,7 @@ static int btree_writepage(struct page *page, struct writeback_control *wbc) | |||
707 | } | 731 | } |
708 | 732 | ||
709 | redirty_page_for_writepage(wbc, page); | 733 | redirty_page_for_writepage(wbc, page); |
710 | eb = btrfs_find_tree_block(root, page_offset(page), | 734 | eb = btrfs_find_tree_block(root, page_offset(page), PAGE_CACHE_SIZE); |
711 | PAGE_CACHE_SIZE); | ||
712 | WARN_ON(!eb); | 735 | WARN_ON(!eb); |
713 | 736 | ||
714 | was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); | 737 | was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); |
@@ -799,6 +822,9 @@ static const struct address_space_operations btree_aops = { | |||
799 | .releasepage = btree_releasepage, | 822 | .releasepage = btree_releasepage, |
800 | .invalidatepage = btree_invalidatepage, | 823 | .invalidatepage = btree_invalidatepage, |
801 | .sync_page = block_sync_page, | 824 | .sync_page = block_sync_page, |
825 | #ifdef CONFIG_MIGRATION | ||
826 | .migratepage = btree_migratepage, | ||
827 | #endif | ||
802 | }; | 828 | }; |
803 | 829 | ||
804 | int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, | 830 | int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, |
@@ -981,7 +1007,10 @@ static int find_and_setup_root(struct btrfs_root *tree_root, | |||
981 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); | 1007 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); |
982 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), | 1008 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), |
983 | blocksize, generation); | 1009 | blocksize, generation); |
984 | BUG_ON(!root->node); | 1010 | if (!root->node || !btrfs_buffer_uptodate(root->node, generation)) { |
1011 | free_extent_buffer(root->node); | ||
1012 | return -EIO; | ||
1013 | } | ||
985 | root->commit_root = btrfs_root_node(root); | 1014 | root->commit_root = btrfs_root_node(root); |
986 | return 0; | 1015 | return 0; |
987 | } | 1016 | } |
@@ -1538,10 +1567,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1538 | GFP_NOFS); | 1567 | GFP_NOFS); |
1539 | struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root), | 1568 | struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root), |
1540 | GFP_NOFS); | 1569 | GFP_NOFS); |
1541 | struct btrfs_root *tree_root = kzalloc(sizeof(struct btrfs_root), | 1570 | struct btrfs_root *tree_root = btrfs_sb(sb); |
1542 | GFP_NOFS); | 1571 | struct btrfs_fs_info *fs_info = tree_root->fs_info; |
1543 | struct btrfs_fs_info *fs_info = kzalloc(sizeof(*fs_info), | ||
1544 | GFP_NOFS); | ||
1545 | struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root), | 1572 | struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root), |
1546 | GFP_NOFS); | 1573 | GFP_NOFS); |
1547 | struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root), | 1574 | struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root), |
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index 951ef09b82f4..659f532d26a0 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c | |||
@@ -166,7 +166,7 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh, | |||
166 | static struct dentry *btrfs_get_parent(struct dentry *child) | 166 | static struct dentry *btrfs_get_parent(struct dentry *child) |
167 | { | 167 | { |
168 | struct inode *dir = child->d_inode; | 168 | struct inode *dir = child->d_inode; |
169 | static struct dentry *dentry; | 169 | struct dentry *dentry; |
170 | struct btrfs_root *root = BTRFS_I(dir)->root; | 170 | struct btrfs_root *root = BTRFS_I(dir)->root; |
171 | struct btrfs_path *path; | 171 | struct btrfs_path *path; |
172 | struct extent_buffer *leaf; | 172 | struct extent_buffer *leaf; |
@@ -232,9 +232,85 @@ fail: | |||
232 | return ERR_PTR(ret); | 232 | return ERR_PTR(ret); |
233 | } | 233 | } |
234 | 234 | ||
235 | static int btrfs_get_name(struct dentry *parent, char *name, | ||
236 | struct dentry *child) | ||
237 | { | ||
238 | struct inode *inode = child->d_inode; | ||
239 | struct inode *dir = parent->d_inode; | ||
240 | struct btrfs_path *path; | ||
241 | struct btrfs_root *root = BTRFS_I(dir)->root; | ||
242 | struct btrfs_inode_ref *iref; | ||
243 | struct btrfs_root_ref *rref; | ||
244 | struct extent_buffer *leaf; | ||
245 | unsigned long name_ptr; | ||
246 | struct btrfs_key key; | ||
247 | int name_len; | ||
248 | int ret; | ||
249 | |||
250 | if (!dir || !inode) | ||
251 | return -EINVAL; | ||
252 | |||
253 | if (!S_ISDIR(dir->i_mode)) | ||
254 | return -EINVAL; | ||
255 | |||
256 | path = btrfs_alloc_path(); | ||
257 | if (!path) | ||
258 | return -ENOMEM; | ||
259 | path->leave_spinning = 1; | ||
260 | |||
261 | if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) { | ||
262 | key.objectid = BTRFS_I(inode)->root->root_key.objectid; | ||
263 | key.type = BTRFS_ROOT_BACKREF_KEY; | ||
264 | key.offset = (u64)-1; | ||
265 | root = root->fs_info->tree_root; | ||
266 | } else { | ||
267 | key.objectid = inode->i_ino; | ||
268 | key.offset = dir->i_ino; | ||
269 | key.type = BTRFS_INODE_REF_KEY; | ||
270 | } | ||
271 | |||
272 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
273 | if (ret < 0) { | ||
274 | btrfs_free_path(path); | ||
275 | return ret; | ||
276 | } else if (ret > 0) { | ||
277 | if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) { | ||
278 | path->slots[0]--; | ||
279 | } else { | ||
280 | btrfs_free_path(path); | ||
281 | return -ENOENT; | ||
282 | } | ||
283 | } | ||
284 | leaf = path->nodes[0]; | ||
285 | |||
286 | if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) { | ||
287 | rref = btrfs_item_ptr(leaf, path->slots[0], | ||
288 | struct btrfs_root_ref); | ||
289 | name_ptr = (unsigned long)(rref + 1); | ||
290 | name_len = btrfs_root_ref_name_len(leaf, rref); | ||
291 | } else { | ||
292 | iref = btrfs_item_ptr(leaf, path->slots[0], | ||
293 | struct btrfs_inode_ref); | ||
294 | name_ptr = (unsigned long)(iref + 1); | ||
295 | name_len = btrfs_inode_ref_name_len(leaf, iref); | ||
296 | } | ||
297 | |||
298 | read_extent_buffer(leaf, name, name_ptr, name_len); | ||
299 | btrfs_free_path(path); | ||
300 | |||
301 | /* | ||
302 | * have to add the null termination to make sure that reconnect_path | ||
303 | * gets the right len for strlen | ||
304 | */ | ||
305 | name[name_len] = '\0'; | ||
306 | |||
307 | return 0; | ||
308 | } | ||
309 | |||
235 | const struct export_operations btrfs_export_ops = { | 310 | const struct export_operations btrfs_export_ops = { |
236 | .encode_fh = btrfs_encode_fh, | 311 | .encode_fh = btrfs_encode_fh, |
237 | .fh_to_dentry = btrfs_fh_to_dentry, | 312 | .fh_to_dentry = btrfs_fh_to_dentry, |
238 | .fh_to_parent = btrfs_fh_to_parent, | 313 | .fh_to_parent = btrfs_fh_to_parent, |
239 | .get_parent = btrfs_get_parent, | 314 | .get_parent = btrfs_get_parent, |
315 | .get_name = btrfs_get_name, | ||
240 | }; | 316 | }; |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0c097f3aec41..227e5815d838 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -429,6 +429,7 @@ err: | |||
429 | 429 | ||
430 | static int cache_block_group(struct btrfs_block_group_cache *cache, | 430 | static int cache_block_group(struct btrfs_block_group_cache *cache, |
431 | struct btrfs_trans_handle *trans, | 431 | struct btrfs_trans_handle *trans, |
432 | struct btrfs_root *root, | ||
432 | int load_cache_only) | 433 | int load_cache_only) |
433 | { | 434 | { |
434 | struct btrfs_fs_info *fs_info = cache->fs_info; | 435 | struct btrfs_fs_info *fs_info = cache->fs_info; |
@@ -442,9 +443,12 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
442 | 443 | ||
443 | /* | 444 | /* |
444 | * We can't do the read from on-disk cache during a commit since we need | 445 | * We can't do the read from on-disk cache during a commit since we need |
445 | * to have the normal tree locking. | 446 | * to have the normal tree locking. Also if we are currently trying to |
447 | * allocate blocks for the tree root we can't do the fast caching since | ||
448 | * we likely hold important locks. | ||
446 | */ | 449 | */ |
447 | if (!trans->transaction->in_commit) { | 450 | if (!trans->transaction->in_commit && |
451 | (root && root != root->fs_info->tree_root)) { | ||
448 | spin_lock(&cache->lock); | 452 | spin_lock(&cache->lock); |
449 | if (cache->cached != BTRFS_CACHE_NO) { | 453 | if (cache->cached != BTRFS_CACHE_NO) { |
450 | spin_unlock(&cache->lock); | 454 | spin_unlock(&cache->lock); |
@@ -2741,6 +2745,7 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group, | |||
2741 | struct btrfs_root *root = block_group->fs_info->tree_root; | 2745 | struct btrfs_root *root = block_group->fs_info->tree_root; |
2742 | struct inode *inode = NULL; | 2746 | struct inode *inode = NULL; |
2743 | u64 alloc_hint = 0; | 2747 | u64 alloc_hint = 0; |
2748 | int dcs = BTRFS_DC_ERROR; | ||
2744 | int num_pages = 0; | 2749 | int num_pages = 0; |
2745 | int retries = 0; | 2750 | int retries = 0; |
2746 | int ret = 0; | 2751 | int ret = 0; |
@@ -2795,6 +2800,8 @@ again: | |||
2795 | 2800 | ||
2796 | spin_lock(&block_group->lock); | 2801 | spin_lock(&block_group->lock); |
2797 | if (block_group->cached != BTRFS_CACHE_FINISHED) { | 2802 | if (block_group->cached != BTRFS_CACHE_FINISHED) { |
2803 | /* We're not cached, don't bother trying to write stuff out */ | ||
2804 | dcs = BTRFS_DC_WRITTEN; | ||
2798 | spin_unlock(&block_group->lock); | 2805 | spin_unlock(&block_group->lock); |
2799 | goto out_put; | 2806 | goto out_put; |
2800 | } | 2807 | } |
@@ -2821,6 +2828,8 @@ again: | |||
2821 | ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages, | 2828 | ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages, |
2822 | num_pages, num_pages, | 2829 | num_pages, num_pages, |
2823 | &alloc_hint); | 2830 | &alloc_hint); |
2831 | if (!ret) | ||
2832 | dcs = BTRFS_DC_SETUP; | ||
2824 | btrfs_free_reserved_data_space(inode, num_pages); | 2833 | btrfs_free_reserved_data_space(inode, num_pages); |
2825 | out_put: | 2834 | out_put: |
2826 | iput(inode); | 2835 | iput(inode); |
@@ -2828,10 +2837,7 @@ out_free: | |||
2828 | btrfs_release_path(root, path); | 2837 | btrfs_release_path(root, path); |
2829 | out: | 2838 | out: |
2830 | spin_lock(&block_group->lock); | 2839 | spin_lock(&block_group->lock); |
2831 | if (ret) | 2840 | block_group->disk_cache_state = dcs; |
2832 | block_group->disk_cache_state = BTRFS_DC_ERROR; | ||
2833 | else | ||
2834 | block_group->disk_cache_state = BTRFS_DC_SETUP; | ||
2835 | spin_unlock(&block_group->lock); | 2841 | spin_unlock(&block_group->lock); |
2836 | 2842 | ||
2837 | return ret; | 2843 | return ret; |
@@ -3037,7 +3043,13 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) | |||
3037 | 3043 | ||
3038 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) | 3044 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) |
3039 | { | 3045 | { |
3040 | u64 num_devices = root->fs_info->fs_devices->rw_devices; | 3046 | /* |
3047 | * we add in the count of missing devices because we want | ||
3048 | * to make sure that any RAID levels on a degraded FS | ||
3049 | * continue to be honored. | ||
3050 | */ | ||
3051 | u64 num_devices = root->fs_info->fs_devices->rw_devices + | ||
3052 | root->fs_info->fs_devices->missing_devices; | ||
3041 | 3053 | ||
3042 | if (num_devices == 1) | 3054 | if (num_devices == 1) |
3043 | flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0); | 3055 | flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0); |
@@ -3412,7 +3424,7 @@ again: | |||
3412 | * our reservation. | 3424 | * our reservation. |
3413 | */ | 3425 | */ |
3414 | if (unused <= space_info->total_bytes) { | 3426 | if (unused <= space_info->total_bytes) { |
3415 | unused -= space_info->total_bytes; | 3427 | unused = space_info->total_bytes - unused; |
3416 | if (unused >= num_bytes) { | 3428 | if (unused >= num_bytes) { |
3417 | if (!reserved) | 3429 | if (!reserved) |
3418 | space_info->bytes_reserved += orig_bytes; | 3430 | space_info->bytes_reserved += orig_bytes; |
@@ -4080,7 +4092,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
4080 | * space back to the block group, otherwise we will leak space. | 4092 | * space back to the block group, otherwise we will leak space. |
4081 | */ | 4093 | */ |
4082 | if (!alloc && cache->cached == BTRFS_CACHE_NO) | 4094 | if (!alloc && cache->cached == BTRFS_CACHE_NO) |
4083 | cache_block_group(cache, trans, 1); | 4095 | cache_block_group(cache, trans, NULL, 1); |
4084 | 4096 | ||
4085 | byte_in_group = bytenr - cache->key.objectid; | 4097 | byte_in_group = bytenr - cache->key.objectid; |
4086 | WARN_ON(byte_in_group > cache->key.offset); | 4098 | WARN_ON(byte_in_group > cache->key.offset); |
@@ -4930,11 +4942,31 @@ search: | |||
4930 | btrfs_get_block_group(block_group); | 4942 | btrfs_get_block_group(block_group); |
4931 | search_start = block_group->key.objectid; | 4943 | search_start = block_group->key.objectid; |
4932 | 4944 | ||
4945 | /* | ||
4946 | * this can happen if we end up cycling through all the | ||
4947 | * raid types, but we want to make sure we only allocate | ||
4948 | * for the proper type. | ||
4949 | */ | ||
4950 | if (!block_group_bits(block_group, data)) { | ||
4951 | u64 extra = BTRFS_BLOCK_GROUP_DUP | | ||
4952 | BTRFS_BLOCK_GROUP_RAID1 | | ||
4953 | BTRFS_BLOCK_GROUP_RAID10; | ||
4954 | |||
4955 | /* | ||
4956 | * if they asked for extra copies and this block group | ||
4957 | * doesn't provide them, bail. This does allow us to | ||
4958 | * fill raid0 from raid1. | ||
4959 | */ | ||
4960 | if ((data & extra) && !(block_group->flags & extra)) | ||
4961 | goto loop; | ||
4962 | } | ||
4963 | |||
4933 | have_block_group: | 4964 | have_block_group: |
4934 | if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { | 4965 | if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { |
4935 | u64 free_percent; | 4966 | u64 free_percent; |
4936 | 4967 | ||
4937 | ret = cache_block_group(block_group, trans, 1); | 4968 | ret = cache_block_group(block_group, trans, |
4969 | orig_root, 1); | ||
4938 | if (block_group->cached == BTRFS_CACHE_FINISHED) | 4970 | if (block_group->cached == BTRFS_CACHE_FINISHED) |
4939 | goto have_block_group; | 4971 | goto have_block_group; |
4940 | 4972 | ||
@@ -4958,7 +4990,8 @@ have_block_group: | |||
4958 | if (loop > LOOP_CACHING_NOWAIT || | 4990 | if (loop > LOOP_CACHING_NOWAIT || |
4959 | (loop > LOOP_FIND_IDEAL && | 4991 | (loop > LOOP_FIND_IDEAL && |
4960 | atomic_read(&space_info->caching_threads) < 2)) { | 4992 | atomic_read(&space_info->caching_threads) < 2)) { |
4961 | ret = cache_block_group(block_group, trans, 0); | 4993 | ret = cache_block_group(block_group, trans, |
4994 | orig_root, 0); | ||
4962 | BUG_ON(ret); | 4995 | BUG_ON(ret); |
4963 | } | 4996 | } |
4964 | found_uncached_bg = true; | 4997 | found_uncached_bg = true; |
@@ -5515,7 +5548,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | |||
5515 | u64 num_bytes = ins->offset; | 5548 | u64 num_bytes = ins->offset; |
5516 | 5549 | ||
5517 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); | 5550 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); |
5518 | cache_block_group(block_group, trans, 0); | 5551 | cache_block_group(block_group, trans, NULL, 0); |
5519 | caching_ctl = get_caching_control(block_group); | 5552 | caching_ctl = get_caching_control(block_group); |
5520 | 5553 | ||
5521 | if (!caching_ctl) { | 5554 | if (!caching_ctl) { |
@@ -6300,9 +6333,13 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
6300 | NULL, NULL); | 6333 | NULL, NULL); |
6301 | BUG_ON(ret < 0); | 6334 | BUG_ON(ret < 0); |
6302 | if (ret > 0) { | 6335 | if (ret > 0) { |
6303 | ret = btrfs_del_orphan_item(trans, tree_root, | 6336 | /* if we fail to delete the orphan item this time |
6304 | root->root_key.objectid); | 6337 | * around, it'll get picked up the next time. |
6305 | BUG_ON(ret); | 6338 | * |
6339 | * The most common failure here is just -ENOENT. | ||
6340 | */ | ||
6341 | btrfs_del_orphan_item(trans, tree_root, | ||
6342 | root->root_key.objectid); | ||
6306 | } | 6343 | } |
6307 | } | 6344 | } |
6308 | 6345 | ||
@@ -7878,7 +7915,14 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) | |||
7878 | u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | | 7915 | u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | |
7879 | BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; | 7916 | BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; |
7880 | 7917 | ||
7881 | num_devices = root->fs_info->fs_devices->rw_devices; | 7918 | /* |
7919 | * we add in the count of missing devices because we want | ||
7920 | * to make sure that any RAID levels on a degraded FS | ||
7921 | * continue to be honored. | ||
7922 | */ | ||
7923 | num_devices = root->fs_info->fs_devices->rw_devices + | ||
7924 | root->fs_info->fs_devices->missing_devices; | ||
7925 | |||
7882 | if (num_devices == 1) { | 7926 | if (num_devices == 1) { |
7883 | stripped |= BTRFS_BLOCK_GROUP_DUP; | 7927 | stripped |= BTRFS_BLOCK_GROUP_DUP; |
7884 | stripped = flags & ~stripped; | 7928 | stripped = flags & ~stripped; |
@@ -8247,7 +8291,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
8247 | break; | 8291 | break; |
8248 | if (ret != 0) | 8292 | if (ret != 0) |
8249 | goto error; | 8293 | goto error; |
8250 | |||
8251 | leaf = path->nodes[0]; | 8294 | leaf = path->nodes[0]; |
8252 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); | 8295 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); |
8253 | cache = kzalloc(sizeof(*cache), GFP_NOFS); | 8296 | cache = kzalloc(sizeof(*cache), GFP_NOFS); |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index eac10e3260a9..3e86b9f36507 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -1828,9 +1828,9 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err) | |||
1828 | bio_put(bio); | 1828 | bio_put(bio); |
1829 | } | 1829 | } |
1830 | 1830 | ||
1831 | static struct bio * | 1831 | struct bio * |
1832 | extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, | 1832 | btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, |
1833 | gfp_t gfp_flags) | 1833 | gfp_t gfp_flags) |
1834 | { | 1834 | { |
1835 | struct bio *bio; | 1835 | struct bio *bio; |
1836 | 1836 | ||
@@ -1919,7 +1919,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, | |||
1919 | else | 1919 | else |
1920 | nr = bio_get_nr_vecs(bdev); | 1920 | nr = bio_get_nr_vecs(bdev); |
1921 | 1921 | ||
1922 | bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); | 1922 | bio = btrfs_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); |
1923 | 1923 | ||
1924 | bio_add_page(bio, page, page_size, offset); | 1924 | bio_add_page(bio, page, page_size, offset); |
1925 | bio->bi_end_io = end_io_func; | 1925 | bio->bi_end_io = end_io_func; |
@@ -2901,21 +2901,53 @@ out: | |||
2901 | int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 2901 | int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
2902 | __u64 start, __u64 len, get_extent_t *get_extent) | 2902 | __u64 start, __u64 len, get_extent_t *get_extent) |
2903 | { | 2903 | { |
2904 | int ret; | 2904 | int ret = 0; |
2905 | u64 off = start; | 2905 | u64 off = start; |
2906 | u64 max = start + len; | 2906 | u64 max = start + len; |
2907 | u32 flags = 0; | 2907 | u32 flags = 0; |
2908 | u32 found_type; | ||
2909 | u64 last; | ||
2908 | u64 disko = 0; | 2910 | u64 disko = 0; |
2911 | struct btrfs_key found_key; | ||
2909 | struct extent_map *em = NULL; | 2912 | struct extent_map *em = NULL; |
2910 | struct extent_state *cached_state = NULL; | 2913 | struct extent_state *cached_state = NULL; |
2914 | struct btrfs_path *path; | ||
2915 | struct btrfs_file_extent_item *item; | ||
2911 | int end = 0; | 2916 | int end = 0; |
2912 | u64 em_start = 0, em_len = 0; | 2917 | u64 em_start = 0, em_len = 0; |
2913 | unsigned long emflags; | 2918 | unsigned long emflags; |
2914 | ret = 0; | 2919 | int hole = 0; |
2915 | 2920 | ||
2916 | if (len == 0) | 2921 | if (len == 0) |
2917 | return -EINVAL; | 2922 | return -EINVAL; |
2918 | 2923 | ||
2924 | path = btrfs_alloc_path(); | ||
2925 | if (!path) | ||
2926 | return -ENOMEM; | ||
2927 | path->leave_spinning = 1; | ||
2928 | |||
2929 | ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root, | ||
2930 | path, inode->i_ino, -1, 0); | ||
2931 | if (ret < 0) { | ||
2932 | btrfs_free_path(path); | ||
2933 | return ret; | ||
2934 | } | ||
2935 | WARN_ON(!ret); | ||
2936 | path->slots[0]--; | ||
2937 | item = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
2938 | struct btrfs_file_extent_item); | ||
2939 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); | ||
2940 | found_type = btrfs_key_type(&found_key); | ||
2941 | |||
2942 | /* No extents, just return */ | ||
2943 | if (found_key.objectid != inode->i_ino || | ||
2944 | found_type != BTRFS_EXTENT_DATA_KEY) { | ||
2945 | btrfs_free_path(path); | ||
2946 | return 0; | ||
2947 | } | ||
2948 | last = found_key.offset; | ||
2949 | btrfs_free_path(path); | ||
2950 | |||
2919 | lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, | 2951 | lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, |
2920 | &cached_state, GFP_NOFS); | 2952 | &cached_state, GFP_NOFS); |
2921 | em = get_extent(inode, NULL, 0, off, max - off, 0); | 2953 | em = get_extent(inode, NULL, 0, off, max - off, 0); |
@@ -2925,11 +2957,18 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
2925 | ret = PTR_ERR(em); | 2957 | ret = PTR_ERR(em); |
2926 | goto out; | 2958 | goto out; |
2927 | } | 2959 | } |
2960 | |||
2928 | while (!end) { | 2961 | while (!end) { |
2962 | hole = 0; | ||
2929 | off = em->start + em->len; | 2963 | off = em->start + em->len; |
2930 | if (off >= max) | 2964 | if (off >= max) |
2931 | end = 1; | 2965 | end = 1; |
2932 | 2966 | ||
2967 | if (em->block_start == EXTENT_MAP_HOLE) { | ||
2968 | hole = 1; | ||
2969 | goto next; | ||
2970 | } | ||
2971 | |||
2933 | em_start = em->start; | 2972 | em_start = em->start; |
2934 | em_len = em->len; | 2973 | em_len = em->len; |
2935 | 2974 | ||
@@ -2939,8 +2978,6 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
2939 | if (em->block_start == EXTENT_MAP_LAST_BYTE) { | 2978 | if (em->block_start == EXTENT_MAP_LAST_BYTE) { |
2940 | end = 1; | 2979 | end = 1; |
2941 | flags |= FIEMAP_EXTENT_LAST; | 2980 | flags |= FIEMAP_EXTENT_LAST; |
2942 | } else if (em->block_start == EXTENT_MAP_HOLE) { | ||
2943 | flags |= FIEMAP_EXTENT_UNWRITTEN; | ||
2944 | } else if (em->block_start == EXTENT_MAP_INLINE) { | 2981 | } else if (em->block_start == EXTENT_MAP_INLINE) { |
2945 | flags |= (FIEMAP_EXTENT_DATA_INLINE | | 2982 | flags |= (FIEMAP_EXTENT_DATA_INLINE | |
2946 | FIEMAP_EXTENT_NOT_ALIGNED); | 2983 | FIEMAP_EXTENT_NOT_ALIGNED); |
@@ -2953,10 +2990,10 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
2953 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) | 2990 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) |
2954 | flags |= FIEMAP_EXTENT_ENCODED; | 2991 | flags |= FIEMAP_EXTENT_ENCODED; |
2955 | 2992 | ||
2993 | next: | ||
2956 | emflags = em->flags; | 2994 | emflags = em->flags; |
2957 | free_extent_map(em); | 2995 | free_extent_map(em); |
2958 | em = NULL; | 2996 | em = NULL; |
2959 | |||
2960 | if (!end) { | 2997 | if (!end) { |
2961 | em = get_extent(inode, NULL, 0, off, max - off, 0); | 2998 | em = get_extent(inode, NULL, 0, off, max - off, 0); |
2962 | if (!em) | 2999 | if (!em) |
@@ -2967,15 +3004,23 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
2967 | } | 3004 | } |
2968 | emflags = em->flags; | 3005 | emflags = em->flags; |
2969 | } | 3006 | } |
3007 | |||
2970 | if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) { | 3008 | if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) { |
2971 | flags |= FIEMAP_EXTENT_LAST; | 3009 | flags |= FIEMAP_EXTENT_LAST; |
2972 | end = 1; | 3010 | end = 1; |
2973 | } | 3011 | } |
2974 | 3012 | ||
2975 | ret = fiemap_fill_next_extent(fieinfo, em_start, disko, | 3013 | if (em_start == last) { |
2976 | em_len, flags); | 3014 | flags |= FIEMAP_EXTENT_LAST; |
2977 | if (ret) | 3015 | end = 1; |
2978 | goto out_free; | 3016 | } |
3017 | |||
3018 | if (!hole) { | ||
3019 | ret = fiemap_fill_next_extent(fieinfo, em_start, disko, | ||
3020 | em_len, flags); | ||
3021 | if (ret) | ||
3022 | goto out_free; | ||
3023 | } | ||
2979 | } | 3024 | } |
2980 | out_free: | 3025 | out_free: |
2981 | free_extent_map(em); | 3026 | free_extent_map(em); |
@@ -3836,8 +3881,10 @@ int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page) | |||
3836 | 3881 | ||
3837 | spin_lock(&tree->buffer_lock); | 3882 | spin_lock(&tree->buffer_lock); |
3838 | eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); | 3883 | eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); |
3839 | if (!eb) | 3884 | if (!eb) { |
3840 | goto out; | 3885 | spin_unlock(&tree->buffer_lock); |
3886 | return ret; | ||
3887 | } | ||
3841 | 3888 | ||
3842 | if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { | 3889 | if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { |
3843 | ret = 0; | 3890 | ret = 0; |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 1c6d4f342ef7..4183c8178f01 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -310,4 +310,7 @@ int extent_clear_unlock_delalloc(struct inode *inode, | |||
310 | struct extent_io_tree *tree, | 310 | struct extent_io_tree *tree, |
311 | u64 start, u64 end, struct page *locked_page, | 311 | u64 start, u64 end, struct page *locked_page, |
312 | unsigned long op); | 312 | unsigned long op); |
313 | struct bio * | ||
314 | btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, | ||
315 | gfp_t gfp_flags); | ||
313 | #endif | 316 | #endif |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index e354c33df082..66836d85763b 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -48,30 +48,34 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, | |||
48 | struct page **prepared_pages, | 48 | struct page **prepared_pages, |
49 | struct iov_iter *i) | 49 | struct iov_iter *i) |
50 | { | 50 | { |
51 | size_t copied; | 51 | size_t copied = 0; |
52 | int pg = 0; | 52 | int pg = 0; |
53 | int offset = pos & (PAGE_CACHE_SIZE - 1); | 53 | int offset = pos & (PAGE_CACHE_SIZE - 1); |
54 | int total_copied = 0; | ||
54 | 55 | ||
55 | while (write_bytes > 0) { | 56 | while (write_bytes > 0) { |
56 | size_t count = min_t(size_t, | 57 | size_t count = min_t(size_t, |
57 | PAGE_CACHE_SIZE - offset, write_bytes); | 58 | PAGE_CACHE_SIZE - offset, write_bytes); |
58 | struct page *page = prepared_pages[pg]; | 59 | struct page *page = prepared_pages[pg]; |
59 | again: | 60 | /* |
60 | if (unlikely(iov_iter_fault_in_readable(i, count))) | 61 | * Copy data from userspace to the current page |
61 | return -EFAULT; | 62 | * |
62 | 63 | * Disable pagefault to avoid recursive lock since | |
63 | /* Copy data from userspace to the current page */ | 64 | * the pages are already locked |
64 | copied = iov_iter_copy_from_user(page, i, offset, count); | 65 | */ |
66 | pagefault_disable(); | ||
67 | copied = iov_iter_copy_from_user_atomic(page, i, offset, count); | ||
68 | pagefault_enable(); | ||
65 | 69 | ||
66 | /* Flush processor's dcache for this page */ | 70 | /* Flush processor's dcache for this page */ |
67 | flush_dcache_page(page); | 71 | flush_dcache_page(page); |
68 | iov_iter_advance(i, copied); | 72 | iov_iter_advance(i, copied); |
69 | write_bytes -= copied; | 73 | write_bytes -= copied; |
74 | total_copied += copied; | ||
70 | 75 | ||
76 | /* Return to btrfs_file_aio_write to fault page */ | ||
71 | if (unlikely(copied == 0)) { | 77 | if (unlikely(copied == 0)) { |
72 | count = min_t(size_t, PAGE_CACHE_SIZE - offset, | 78 | break; |
73 | iov_iter_single_seg_count(i)); | ||
74 | goto again; | ||
75 | } | 79 | } |
76 | 80 | ||
77 | if (unlikely(copied < PAGE_CACHE_SIZE - offset)) { | 81 | if (unlikely(copied < PAGE_CACHE_SIZE - offset)) { |
@@ -81,7 +85,7 @@ again: | |||
81 | offset = 0; | 85 | offset = 0; |
82 | } | 86 | } |
83 | } | 87 | } |
84 | return 0; | 88 | return total_copied; |
85 | } | 89 | } |
86 | 90 | ||
87 | /* | 91 | /* |
@@ -854,6 +858,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
854 | unsigned long last_index; | 858 | unsigned long last_index; |
855 | int will_write; | 859 | int will_write; |
856 | int buffered = 0; | 860 | int buffered = 0; |
861 | int copied = 0; | ||
862 | int dirty_pages = 0; | ||
857 | 863 | ||
858 | will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || | 864 | will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || |
859 | (file->f_flags & O_DIRECT)); | 865 | (file->f_flags & O_DIRECT)); |
@@ -970,7 +976,17 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
970 | WARN_ON(num_pages > nrptrs); | 976 | WARN_ON(num_pages > nrptrs); |
971 | memset(pages, 0, sizeof(struct page *) * nrptrs); | 977 | memset(pages, 0, sizeof(struct page *) * nrptrs); |
972 | 978 | ||
973 | ret = btrfs_delalloc_reserve_space(inode, write_bytes); | 979 | /* |
980 | * Fault pages before locking them in prepare_pages | ||
981 | * to avoid recursive lock | ||
982 | */ | ||
983 | if (unlikely(iov_iter_fault_in_readable(&i, write_bytes))) { | ||
984 | ret = -EFAULT; | ||
985 | goto out; | ||
986 | } | ||
987 | |||
988 | ret = btrfs_delalloc_reserve_space(inode, | ||
989 | num_pages << PAGE_CACHE_SHIFT); | ||
974 | if (ret) | 990 | if (ret) |
975 | goto out; | 991 | goto out; |
976 | 992 | ||
@@ -978,37 +994,49 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
978 | pos, first_index, last_index, | 994 | pos, first_index, last_index, |
979 | write_bytes); | 995 | write_bytes); |
980 | if (ret) { | 996 | if (ret) { |
981 | btrfs_delalloc_release_space(inode, write_bytes); | 997 | btrfs_delalloc_release_space(inode, |
998 | num_pages << PAGE_CACHE_SHIFT); | ||
982 | goto out; | 999 | goto out; |
983 | } | 1000 | } |
984 | 1001 | ||
985 | ret = btrfs_copy_from_user(pos, num_pages, | 1002 | copied = btrfs_copy_from_user(pos, num_pages, |
986 | write_bytes, pages, &i); | 1003 | write_bytes, pages, &i); |
987 | if (ret == 0) { | 1004 | dirty_pages = (copied + PAGE_CACHE_SIZE - 1) >> |
1005 | PAGE_CACHE_SHIFT; | ||
1006 | |||
1007 | if (num_pages > dirty_pages) { | ||
1008 | if (copied > 0) | ||
1009 | atomic_inc( | ||
1010 | &BTRFS_I(inode)->outstanding_extents); | ||
1011 | btrfs_delalloc_release_space(inode, | ||
1012 | (num_pages - dirty_pages) << | ||
1013 | PAGE_CACHE_SHIFT); | ||
1014 | } | ||
1015 | |||
1016 | if (copied > 0) { | ||
988 | dirty_and_release_pages(NULL, root, file, pages, | 1017 | dirty_and_release_pages(NULL, root, file, pages, |
989 | num_pages, pos, write_bytes); | 1018 | dirty_pages, pos, copied); |
990 | } | 1019 | } |
991 | 1020 | ||
992 | btrfs_drop_pages(pages, num_pages); | 1021 | btrfs_drop_pages(pages, num_pages); |
993 | if (ret) { | ||
994 | btrfs_delalloc_release_space(inode, write_bytes); | ||
995 | goto out; | ||
996 | } | ||
997 | 1022 | ||
998 | if (will_write) { | 1023 | if (copied > 0) { |
999 | filemap_fdatawrite_range(inode->i_mapping, pos, | 1024 | if (will_write) { |
1000 | pos + write_bytes - 1); | 1025 | filemap_fdatawrite_range(inode->i_mapping, pos, |
1001 | } else { | 1026 | pos + copied - 1); |
1002 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, | 1027 | } else { |
1003 | num_pages); | 1028 | balance_dirty_pages_ratelimited_nr( |
1004 | if (num_pages < | 1029 | inode->i_mapping, |
1005 | (root->leafsize >> PAGE_CACHE_SHIFT) + 1) | 1030 | dirty_pages); |
1006 | btrfs_btree_balance_dirty(root, 1); | 1031 | if (dirty_pages < |
1007 | btrfs_throttle(root); | 1032 | (root->leafsize >> PAGE_CACHE_SHIFT) + 1) |
1033 | btrfs_btree_balance_dirty(root, 1); | ||
1034 | btrfs_throttle(root); | ||
1035 | } | ||
1008 | } | 1036 | } |
1009 | 1037 | ||
1010 | pos += write_bytes; | 1038 | pos += copied; |
1011 | num_written += write_bytes; | 1039 | num_written += copied; |
1012 | 1040 | ||
1013 | cond_resched(); | 1041 | cond_resched(); |
1014 | } | 1042 | } |
@@ -1047,8 +1075,14 @@ out: | |||
1047 | 1075 | ||
1048 | if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { | 1076 | if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { |
1049 | trans = btrfs_start_transaction(root, 0); | 1077 | trans = btrfs_start_transaction(root, 0); |
1078 | if (IS_ERR(trans)) { | ||
1079 | num_written = PTR_ERR(trans); | ||
1080 | goto done; | ||
1081 | } | ||
1082 | mutex_lock(&inode->i_mutex); | ||
1050 | ret = btrfs_log_dentry_safe(trans, root, | 1083 | ret = btrfs_log_dentry_safe(trans, root, |
1051 | file->f_dentry); | 1084 | file->f_dentry); |
1085 | mutex_unlock(&inode->i_mutex); | ||
1052 | if (ret == 0) { | 1086 | if (ret == 0) { |
1053 | ret = btrfs_sync_log(trans, root); | 1087 | ret = btrfs_sync_log(trans, root); |
1054 | if (ret == 0) | 1088 | if (ret == 0) |
@@ -1067,6 +1101,7 @@ out: | |||
1067 | (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); | 1101 | (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); |
1068 | } | 1102 | } |
1069 | } | 1103 | } |
1104 | done: | ||
1070 | current->backing_dev_info = NULL; | 1105 | current->backing_dev_info = NULL; |
1071 | return num_written ? num_written : err; | 1106 | return num_written ? num_written : err; |
1072 | } | 1107 | } |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 22ee0dc2e6b8..60d684266959 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -290,7 +290,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, | |||
290 | (unsigned long long)BTRFS_I(inode)->generation, | 290 | (unsigned long long)BTRFS_I(inode)->generation, |
291 | (unsigned long long)generation, | 291 | (unsigned long long)generation, |
292 | (unsigned long long)block_group->key.objectid); | 292 | (unsigned long long)block_group->key.objectid); |
293 | goto out; | 293 | goto free_cache; |
294 | } | 294 | } |
295 | 295 | ||
296 | if (!num_entries) | 296 | if (!num_entries) |
@@ -524,6 +524,12 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
524 | return 0; | 524 | return 0; |
525 | } | 525 | } |
526 | 526 | ||
527 | node = rb_first(&block_group->free_space_offset); | ||
528 | if (!node) { | ||
529 | iput(inode); | ||
530 | return 0; | ||
531 | } | ||
532 | |||
527 | last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; | 533 | last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; |
528 | filemap_write_and_wait(inode->i_mapping); | 534 | filemap_write_and_wait(inode->i_mapping); |
529 | btrfs_wait_ordered_range(inode, inode->i_size & | 535 | btrfs_wait_ordered_range(inode, inode->i_size & |
@@ -543,10 +549,6 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
543 | */ | 549 | */ |
544 | first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64); | 550 | first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64); |
545 | 551 | ||
546 | node = rb_first(&block_group->free_space_offset); | ||
547 | if (!node) | ||
548 | goto out_free; | ||
549 | |||
550 | /* | 552 | /* |
551 | * Lock all pages first so we can lock the extent safely. | 553 | * Lock all pages first so we can lock the extent safely. |
552 | * | 554 | * |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 558cac2dfa54..72f31ecb5c90 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -495,7 +495,7 @@ again: | |||
495 | add_async_extent(async_cow, start, num_bytes, | 495 | add_async_extent(async_cow, start, num_bytes, |
496 | total_compressed, pages, nr_pages_ret); | 496 | total_compressed, pages, nr_pages_ret); |
497 | 497 | ||
498 | if (start + num_bytes < end && start + num_bytes < actual_end) { | 498 | if (start + num_bytes < end) { |
499 | start += num_bytes; | 499 | start += num_bytes; |
500 | pages = NULL; | 500 | pages = NULL; |
501 | cond_resched(); | 501 | cond_resched(); |
@@ -4501,6 +4501,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
4501 | BTRFS_I(inode)->index_cnt = 2; | 4501 | BTRFS_I(inode)->index_cnt = 2; |
4502 | BTRFS_I(inode)->root = root; | 4502 | BTRFS_I(inode)->root = root; |
4503 | BTRFS_I(inode)->generation = trans->transid; | 4503 | BTRFS_I(inode)->generation = trans->transid; |
4504 | inode->i_generation = BTRFS_I(inode)->generation; | ||
4504 | btrfs_set_inode_space_info(root, inode); | 4505 | btrfs_set_inode_space_info(root, inode); |
4505 | 4506 | ||
4506 | if (mode & S_IFDIR) | 4507 | if (mode & S_IFDIR) |
@@ -4622,12 +4623,12 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, | |||
4622 | } | 4623 | } |
4623 | 4624 | ||
4624 | static int btrfs_add_nondir(struct btrfs_trans_handle *trans, | 4625 | static int btrfs_add_nondir(struct btrfs_trans_handle *trans, |
4625 | struct dentry *dentry, struct inode *inode, | 4626 | struct inode *dir, struct dentry *dentry, |
4626 | int backref, u64 index) | 4627 | struct inode *inode, int backref, u64 index) |
4627 | { | 4628 | { |
4628 | int err = btrfs_add_link(trans, dentry->d_parent->d_inode, | 4629 | int err = btrfs_add_link(trans, dir, inode, |
4629 | inode, dentry->d_name.name, | 4630 | dentry->d_name.name, dentry->d_name.len, |
4630 | dentry->d_name.len, backref, index); | 4631 | backref, index); |
4631 | if (!err) { | 4632 | if (!err) { |
4632 | d_instantiate(dentry, inode); | 4633 | d_instantiate(dentry, inode); |
4633 | return 0; | 4634 | return 0; |
@@ -4668,8 +4669,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
4668 | btrfs_set_trans_block_group(trans, dir); | 4669 | btrfs_set_trans_block_group(trans, dir); |
4669 | 4670 | ||
4670 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | 4671 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, |
4671 | dentry->d_name.len, | 4672 | dentry->d_name.len, dir->i_ino, objectid, |
4672 | dentry->d_parent->d_inode->i_ino, objectid, | ||
4673 | BTRFS_I(dir)->block_group, mode, &index); | 4673 | BTRFS_I(dir)->block_group, mode, &index); |
4674 | err = PTR_ERR(inode); | 4674 | err = PTR_ERR(inode); |
4675 | if (IS_ERR(inode)) | 4675 | if (IS_ERR(inode)) |
@@ -4682,7 +4682,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
4682 | } | 4682 | } |
4683 | 4683 | ||
4684 | btrfs_set_trans_block_group(trans, inode); | 4684 | btrfs_set_trans_block_group(trans, inode); |
4685 | err = btrfs_add_nondir(trans, dentry, inode, 0, index); | 4685 | err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); |
4686 | if (err) | 4686 | if (err) |
4687 | drop_inode = 1; | 4687 | drop_inode = 1; |
4688 | else { | 4688 | else { |
@@ -4730,10 +4730,8 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
4730 | btrfs_set_trans_block_group(trans, dir); | 4730 | btrfs_set_trans_block_group(trans, dir); |
4731 | 4731 | ||
4732 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | 4732 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, |
4733 | dentry->d_name.len, | 4733 | dentry->d_name.len, dir->i_ino, objectid, |
4734 | dentry->d_parent->d_inode->i_ino, | 4734 | BTRFS_I(dir)->block_group, mode, &index); |
4735 | objectid, BTRFS_I(dir)->block_group, mode, | ||
4736 | &index); | ||
4737 | err = PTR_ERR(inode); | 4735 | err = PTR_ERR(inode); |
4738 | if (IS_ERR(inode)) | 4736 | if (IS_ERR(inode)) |
4739 | goto out_unlock; | 4737 | goto out_unlock; |
@@ -4745,7 +4743,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
4745 | } | 4743 | } |
4746 | 4744 | ||
4747 | btrfs_set_trans_block_group(trans, inode); | 4745 | btrfs_set_trans_block_group(trans, inode); |
4748 | err = btrfs_add_nondir(trans, dentry, inode, 0, index); | 4746 | err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); |
4749 | if (err) | 4747 | if (err) |
4750 | drop_inode = 1; | 4748 | drop_inode = 1; |
4751 | else { | 4749 | else { |
@@ -4787,6 +4785,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
4787 | return -EPERM; | 4785 | return -EPERM; |
4788 | 4786 | ||
4789 | btrfs_inc_nlink(inode); | 4787 | btrfs_inc_nlink(inode); |
4788 | inode->i_ctime = CURRENT_TIME; | ||
4790 | 4789 | ||
4791 | err = btrfs_set_inode_index(dir, &index); | 4790 | err = btrfs_set_inode_index(dir, &index); |
4792 | if (err) | 4791 | if (err) |
@@ -4805,15 +4804,17 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
4805 | btrfs_set_trans_block_group(trans, dir); | 4804 | btrfs_set_trans_block_group(trans, dir); |
4806 | ihold(inode); | 4805 | ihold(inode); |
4807 | 4806 | ||
4808 | err = btrfs_add_nondir(trans, dentry, inode, 1, index); | 4807 | err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index); |
4809 | 4808 | ||
4810 | if (err) { | 4809 | if (err) { |
4811 | drop_inode = 1; | 4810 | drop_inode = 1; |
4812 | } else { | 4811 | } else { |
4812 | struct dentry *parent = dget_parent(dentry); | ||
4813 | btrfs_update_inode_block_group(trans, dir); | 4813 | btrfs_update_inode_block_group(trans, dir); |
4814 | err = btrfs_update_inode(trans, root, inode); | 4814 | err = btrfs_update_inode(trans, root, inode); |
4815 | BUG_ON(err); | 4815 | BUG_ON(err); |
4816 | btrfs_log_new_name(trans, inode, NULL, dentry->d_parent); | 4816 | btrfs_log_new_name(trans, inode, NULL, parent); |
4817 | dput(parent); | ||
4817 | } | 4818 | } |
4818 | 4819 | ||
4819 | nr = trans->blocks_used; | 4820 | nr = trans->blocks_used; |
@@ -4853,8 +4854,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
4853 | btrfs_set_trans_block_group(trans, dir); | 4854 | btrfs_set_trans_block_group(trans, dir); |
4854 | 4855 | ||
4855 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | 4856 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, |
4856 | dentry->d_name.len, | 4857 | dentry->d_name.len, dir->i_ino, objectid, |
4857 | dentry->d_parent->d_inode->i_ino, objectid, | ||
4858 | BTRFS_I(dir)->block_group, S_IFDIR | mode, | 4858 | BTRFS_I(dir)->block_group, S_IFDIR | mode, |
4859 | &index); | 4859 | &index); |
4860 | if (IS_ERR(inode)) { | 4860 | if (IS_ERR(inode)) { |
@@ -4877,9 +4877,8 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
4877 | if (err) | 4877 | if (err) |
4878 | goto out_fail; | 4878 | goto out_fail; |
4879 | 4879 | ||
4880 | err = btrfs_add_link(trans, dentry->d_parent->d_inode, | 4880 | err = btrfs_add_link(trans, dir, inode, dentry->d_name.name, |
4881 | inode, dentry->d_name.name, | 4881 | dentry->d_name.len, 0, index); |
4882 | dentry->d_name.len, 0, index); | ||
4883 | if (err) | 4882 | if (err) |
4884 | goto out_fail; | 4883 | goto out_fail; |
4885 | 4884 | ||
@@ -5535,13 +5534,21 @@ struct btrfs_dio_private { | |||
5535 | u64 bytes; | 5534 | u64 bytes; |
5536 | u32 *csums; | 5535 | u32 *csums; |
5537 | void *private; | 5536 | void *private; |
5537 | |||
5538 | /* number of bios pending for this dio */ | ||
5539 | atomic_t pending_bios; | ||
5540 | |||
5541 | /* IO errors */ | ||
5542 | int errors; | ||
5543 | |||
5544 | struct bio *orig_bio; | ||
5538 | }; | 5545 | }; |
5539 | 5546 | ||
5540 | static void btrfs_endio_direct_read(struct bio *bio, int err) | 5547 | static void btrfs_endio_direct_read(struct bio *bio, int err) |
5541 | { | 5548 | { |
5549 | struct btrfs_dio_private *dip = bio->bi_private; | ||
5542 | struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; | 5550 | struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; |
5543 | struct bio_vec *bvec = bio->bi_io_vec; | 5551 | struct bio_vec *bvec = bio->bi_io_vec; |
5544 | struct btrfs_dio_private *dip = bio->bi_private; | ||
5545 | struct inode *inode = dip->inode; | 5552 | struct inode *inode = dip->inode; |
5546 | struct btrfs_root *root = BTRFS_I(inode)->root; | 5553 | struct btrfs_root *root = BTRFS_I(inode)->root; |
5547 | u64 start; | 5554 | u64 start; |
@@ -5595,15 +5602,18 @@ static void btrfs_endio_direct_write(struct bio *bio, int err) | |||
5595 | struct btrfs_trans_handle *trans; | 5602 | struct btrfs_trans_handle *trans; |
5596 | struct btrfs_ordered_extent *ordered = NULL; | 5603 | struct btrfs_ordered_extent *ordered = NULL; |
5597 | struct extent_state *cached_state = NULL; | 5604 | struct extent_state *cached_state = NULL; |
5605 | u64 ordered_offset = dip->logical_offset; | ||
5606 | u64 ordered_bytes = dip->bytes; | ||
5598 | int ret; | 5607 | int ret; |
5599 | 5608 | ||
5600 | if (err) | 5609 | if (err) |
5601 | goto out_done; | 5610 | goto out_done; |
5602 | 5611 | again: | |
5603 | ret = btrfs_dec_test_ordered_pending(inode, &ordered, | 5612 | ret = btrfs_dec_test_first_ordered_pending(inode, &ordered, |
5604 | dip->logical_offset, dip->bytes); | 5613 | &ordered_offset, |
5614 | ordered_bytes); | ||
5605 | if (!ret) | 5615 | if (!ret) |
5606 | goto out_done; | 5616 | goto out_test; |
5607 | 5617 | ||
5608 | BUG_ON(!ordered); | 5618 | BUG_ON(!ordered); |
5609 | 5619 | ||
@@ -5663,8 +5673,20 @@ out_unlock: | |||
5663 | out: | 5673 | out: |
5664 | btrfs_delalloc_release_metadata(inode, ordered->len); | 5674 | btrfs_delalloc_release_metadata(inode, ordered->len); |
5665 | btrfs_end_transaction(trans, root); | 5675 | btrfs_end_transaction(trans, root); |
5676 | ordered_offset = ordered->file_offset + ordered->len; | ||
5666 | btrfs_put_ordered_extent(ordered); | 5677 | btrfs_put_ordered_extent(ordered); |
5667 | btrfs_put_ordered_extent(ordered); | 5678 | btrfs_put_ordered_extent(ordered); |
5679 | |||
5680 | out_test: | ||
5681 | /* | ||
5682 | * our bio might span multiple ordered extents. If we haven't | ||
5683 | * completed the accounting for the whole dio, go back and try again | ||
5684 | */ | ||
5685 | if (ordered_offset < dip->logical_offset + dip->bytes) { | ||
5686 | ordered_bytes = dip->logical_offset + dip->bytes - | ||
5687 | ordered_offset; | ||
5688 | goto again; | ||
5689 | } | ||
5668 | out_done: | 5690 | out_done: |
5669 | bio->bi_private = dip->private; | 5691 | bio->bi_private = dip->private; |
5670 | 5692 | ||
@@ -5684,6 +5706,176 @@ static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw, | |||
5684 | return 0; | 5706 | return 0; |
5685 | } | 5707 | } |
5686 | 5708 | ||
5709 | static void btrfs_end_dio_bio(struct bio *bio, int err) | ||
5710 | { | ||
5711 | struct btrfs_dio_private *dip = bio->bi_private; | ||
5712 | |||
5713 | if (err) { | ||
5714 | printk(KERN_ERR "btrfs direct IO failed ino %lu rw %lu " | ||
5715 | "sector %#Lx len %u err no %d\n", | ||
5716 | dip->inode->i_ino, bio->bi_rw, | ||
5717 | (unsigned long long)bio->bi_sector, bio->bi_size, err); | ||
5718 | dip->errors = 1; | ||
5719 | |||
5720 | /* | ||
5721 | * before atomic variable goto zero, we must make sure | ||
5722 | * dip->errors is perceived to be set. | ||
5723 | */ | ||
5724 | smp_mb__before_atomic_dec(); | ||
5725 | } | ||
5726 | |||
5727 | /* if there are more bios still pending for this dio, just exit */ | ||
5728 | if (!atomic_dec_and_test(&dip->pending_bios)) | ||
5729 | goto out; | ||
5730 | |||
5731 | if (dip->errors) | ||
5732 | bio_io_error(dip->orig_bio); | ||
5733 | else { | ||
5734 | set_bit(BIO_UPTODATE, &dip->orig_bio->bi_flags); | ||
5735 | bio_endio(dip->orig_bio, 0); | ||
5736 | } | ||
5737 | out: | ||
5738 | bio_put(bio); | ||
5739 | } | ||
5740 | |||
5741 | static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev, | ||
5742 | u64 first_sector, gfp_t gfp_flags) | ||
5743 | { | ||
5744 | int nr_vecs = bio_get_nr_vecs(bdev); | ||
5745 | return btrfs_bio_alloc(bdev, first_sector, nr_vecs, gfp_flags); | ||
5746 | } | ||
5747 | |||
5748 | static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, | ||
5749 | int rw, u64 file_offset, int skip_sum, | ||
5750 | u32 *csums) | ||
5751 | { | ||
5752 | int write = rw & REQ_WRITE; | ||
5753 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
5754 | int ret; | ||
5755 | |||
5756 | bio_get(bio); | ||
5757 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | ||
5758 | if (ret) | ||
5759 | goto err; | ||
5760 | |||
5761 | if (write && !skip_sum) { | ||
5762 | ret = btrfs_wq_submit_bio(root->fs_info, | ||
5763 | inode, rw, bio, 0, 0, | ||
5764 | file_offset, | ||
5765 | __btrfs_submit_bio_start_direct_io, | ||
5766 | __btrfs_submit_bio_done); | ||
5767 | goto err; | ||
5768 | } else if (!skip_sum) | ||
5769 | btrfs_lookup_bio_sums_dio(root, inode, bio, | ||
5770 | file_offset, csums); | ||
5771 | |||
5772 | ret = btrfs_map_bio(root, rw, bio, 0, 1); | ||
5773 | err: | ||
5774 | bio_put(bio); | ||
5775 | return ret; | ||
5776 | } | ||
5777 | |||
5778 | static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, | ||
5779 | int skip_sum) | ||
5780 | { | ||
5781 | struct inode *inode = dip->inode; | ||
5782 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
5783 | struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; | ||
5784 | struct bio *bio; | ||
5785 | struct bio *orig_bio = dip->orig_bio; | ||
5786 | struct bio_vec *bvec = orig_bio->bi_io_vec; | ||
5787 | u64 start_sector = orig_bio->bi_sector; | ||
5788 | u64 file_offset = dip->logical_offset; | ||
5789 | u64 submit_len = 0; | ||
5790 | u64 map_length; | ||
5791 | int nr_pages = 0; | ||
5792 | u32 *csums = dip->csums; | ||
5793 | int ret = 0; | ||
5794 | |||
5795 | bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS); | ||
5796 | if (!bio) | ||
5797 | return -ENOMEM; | ||
5798 | bio->bi_private = dip; | ||
5799 | bio->bi_end_io = btrfs_end_dio_bio; | ||
5800 | atomic_inc(&dip->pending_bios); | ||
5801 | |||
5802 | map_length = orig_bio->bi_size; | ||
5803 | ret = btrfs_map_block(map_tree, READ, start_sector << 9, | ||
5804 | &map_length, NULL, 0); | ||
5805 | if (ret) { | ||
5806 | bio_put(bio); | ||
5807 | return -EIO; | ||
5808 | } | ||
5809 | |||
5810 | while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) { | ||
5811 | if (unlikely(map_length < submit_len + bvec->bv_len || | ||
5812 | bio_add_page(bio, bvec->bv_page, bvec->bv_len, | ||
5813 | bvec->bv_offset) < bvec->bv_len)) { | ||
5814 | /* | ||
5815 | * inc the count before we submit the bio so | ||
5816 | * we know the end IO handler won't happen before | ||
5817 | * we inc the count. Otherwise, the dip might get freed | ||
5818 | * before we're done setting it up | ||
5819 | */ | ||
5820 | atomic_inc(&dip->pending_bios); | ||
5821 | ret = __btrfs_submit_dio_bio(bio, inode, rw, | ||
5822 | file_offset, skip_sum, | ||
5823 | csums); | ||
5824 | if (ret) { | ||
5825 | bio_put(bio); | ||
5826 | atomic_dec(&dip->pending_bios); | ||
5827 | goto out_err; | ||
5828 | } | ||
5829 | |||
5830 | if (!skip_sum) | ||
5831 | csums = csums + nr_pages; | ||
5832 | start_sector += submit_len >> 9; | ||
5833 | file_offset += submit_len; | ||
5834 | |||
5835 | submit_len = 0; | ||
5836 | nr_pages = 0; | ||
5837 | |||
5838 | bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, | ||
5839 | start_sector, GFP_NOFS); | ||
5840 | if (!bio) | ||
5841 | goto out_err; | ||
5842 | bio->bi_private = dip; | ||
5843 | bio->bi_end_io = btrfs_end_dio_bio; | ||
5844 | |||
5845 | map_length = orig_bio->bi_size; | ||
5846 | ret = btrfs_map_block(map_tree, READ, start_sector << 9, | ||
5847 | &map_length, NULL, 0); | ||
5848 | if (ret) { | ||
5849 | bio_put(bio); | ||
5850 | goto out_err; | ||
5851 | } | ||
5852 | } else { | ||
5853 | submit_len += bvec->bv_len; | ||
5854 | nr_pages ++; | ||
5855 | bvec++; | ||
5856 | } | ||
5857 | } | ||
5858 | |||
5859 | ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum, | ||
5860 | csums); | ||
5861 | if (!ret) | ||
5862 | return 0; | ||
5863 | |||
5864 | bio_put(bio); | ||
5865 | out_err: | ||
5866 | dip->errors = 1; | ||
5867 | /* | ||
5868 | * before atomic variable goto zero, we must | ||
5869 | * make sure dip->errors is perceived to be set. | ||
5870 | */ | ||
5871 | smp_mb__before_atomic_dec(); | ||
5872 | if (atomic_dec_and_test(&dip->pending_bios)) | ||
5873 | bio_io_error(dip->orig_bio); | ||
5874 | |||
5875 | /* bio_end_io() will handle error, so we needn't return it */ | ||
5876 | return 0; | ||
5877 | } | ||
5878 | |||
5687 | static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, | 5879 | static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, |
5688 | loff_t file_offset) | 5880 | loff_t file_offset) |
5689 | { | 5881 | { |
@@ -5723,36 +5915,18 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, | |||
5723 | 5915 | ||
5724 | dip->disk_bytenr = (u64)bio->bi_sector << 9; | 5916 | dip->disk_bytenr = (u64)bio->bi_sector << 9; |
5725 | bio->bi_private = dip; | 5917 | bio->bi_private = dip; |
5918 | dip->errors = 0; | ||
5919 | dip->orig_bio = bio; | ||
5920 | atomic_set(&dip->pending_bios, 0); | ||
5726 | 5921 | ||
5727 | if (write) | 5922 | if (write) |
5728 | bio->bi_end_io = btrfs_endio_direct_write; | 5923 | bio->bi_end_io = btrfs_endio_direct_write; |
5729 | else | 5924 | else |
5730 | bio->bi_end_io = btrfs_endio_direct_read; | 5925 | bio->bi_end_io = btrfs_endio_direct_read; |
5731 | 5926 | ||
5732 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | 5927 | ret = btrfs_submit_direct_hook(rw, dip, skip_sum); |
5733 | if (ret) | 5928 | if (!ret) |
5734 | goto out_err; | ||
5735 | |||
5736 | if (write && !skip_sum) { | ||
5737 | ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, | ||
5738 | inode, rw, bio, 0, 0, | ||
5739 | dip->logical_offset, | ||
5740 | __btrfs_submit_bio_start_direct_io, | ||
5741 | __btrfs_submit_bio_done); | ||
5742 | if (ret) | ||
5743 | goto out_err; | ||
5744 | return; | 5929 | return; |
5745 | } else if (!skip_sum) | ||
5746 | btrfs_lookup_bio_sums_dio(root, inode, bio, | ||
5747 | dip->logical_offset, dip->csums); | ||
5748 | |||
5749 | ret = btrfs_map_bio(root, rw, bio, 0, 1); | ||
5750 | if (ret) | ||
5751 | goto out_err; | ||
5752 | return; | ||
5753 | out_err: | ||
5754 | kfree(dip->csums); | ||
5755 | kfree(dip); | ||
5756 | free_ordered: | 5930 | free_ordered: |
5757 | /* | 5931 | /* |
5758 | * If this is a write, we need to clean up the reserved space and kill | 5932 | * If this is a write, we need to clean up the reserved space and kill |
@@ -5760,8 +5934,7 @@ free_ordered: | |||
5760 | */ | 5934 | */ |
5761 | if (write) { | 5935 | if (write) { |
5762 | struct btrfs_ordered_extent *ordered; | 5936 | struct btrfs_ordered_extent *ordered; |
5763 | ordered = btrfs_lookup_ordered_extent(inode, | 5937 | ordered = btrfs_lookup_ordered_extent(inode, file_offset); |
5764 | dip->logical_offset); | ||
5765 | if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) && | 5938 | if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) && |
5766 | !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) | 5939 | !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) |
5767 | btrfs_free_reserved_extent(root, ordered->start, | 5940 | btrfs_free_reserved_extent(root, ordered->start, |
@@ -6607,8 +6780,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
6607 | BUG_ON(ret); | 6780 | BUG_ON(ret); |
6608 | 6781 | ||
6609 | if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { | 6782 | if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { |
6610 | btrfs_log_new_name(trans, old_inode, old_dir, | 6783 | struct dentry *parent = dget_parent(new_dentry); |
6611 | new_dentry->d_parent); | 6784 | btrfs_log_new_name(trans, old_inode, old_dir, parent); |
6785 | dput(parent); | ||
6612 | btrfs_end_log_trans(root); | 6786 | btrfs_end_log_trans(root); |
6613 | } | 6787 | } |
6614 | out_fail: | 6788 | out_fail: |
@@ -6758,8 +6932,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
6758 | btrfs_set_trans_block_group(trans, dir); | 6932 | btrfs_set_trans_block_group(trans, dir); |
6759 | 6933 | ||
6760 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | 6934 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, |
6761 | dentry->d_name.len, | 6935 | dentry->d_name.len, dir->i_ino, objectid, |
6762 | dentry->d_parent->d_inode->i_ino, objectid, | ||
6763 | BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO, | 6936 | BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO, |
6764 | &index); | 6937 | &index); |
6765 | err = PTR_ERR(inode); | 6938 | err = PTR_ERR(inode); |
@@ -6773,7 +6946,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
6773 | } | 6946 | } |
6774 | 6947 | ||
6775 | btrfs_set_trans_block_group(trans, inode); | 6948 | btrfs_set_trans_block_group(trans, inode); |
6776 | err = btrfs_add_nondir(trans, dentry, inode, 0, index); | 6949 | err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); |
6777 | if (err) | 6950 | if (err) |
6778 | drop_inode = 1; | 6951 | drop_inode = 1; |
6779 | else { | 6952 | else { |
@@ -6844,6 +7017,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, | |||
6844 | struct btrfs_root *root = BTRFS_I(inode)->root; | 7017 | struct btrfs_root *root = BTRFS_I(inode)->root; |
6845 | struct btrfs_key ins; | 7018 | struct btrfs_key ins; |
6846 | u64 cur_offset = start; | 7019 | u64 cur_offset = start; |
7020 | u64 i_size; | ||
6847 | int ret = 0; | 7021 | int ret = 0; |
6848 | bool own_trans = true; | 7022 | bool own_trans = true; |
6849 | 7023 | ||
@@ -6885,11 +7059,11 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, | |||
6885 | (actual_len > inode->i_size) && | 7059 | (actual_len > inode->i_size) && |
6886 | (cur_offset > inode->i_size)) { | 7060 | (cur_offset > inode->i_size)) { |
6887 | if (cur_offset > actual_len) | 7061 | if (cur_offset > actual_len) |
6888 | i_size_write(inode, actual_len); | 7062 | i_size = actual_len; |
6889 | else | 7063 | else |
6890 | i_size_write(inode, cur_offset); | 7064 | i_size = cur_offset; |
6891 | i_size_write(inode, cur_offset); | 7065 | i_size_write(inode, i_size); |
6892 | btrfs_ordered_update_i_size(inode, cur_offset, NULL); | 7066 | btrfs_ordered_update_i_size(inode, i_size, NULL); |
6893 | } | 7067 | } |
6894 | 7068 | ||
6895 | ret = btrfs_update_inode(trans, root, inode); | 7069 | ret = btrfs_update_inode(trans, root, inode); |
@@ -6943,6 +7117,10 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
6943 | btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start); | 7117 | btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start); |
6944 | 7118 | ||
6945 | mutex_lock(&inode->i_mutex); | 7119 | mutex_lock(&inode->i_mutex); |
7120 | ret = inode_newsize_ok(inode, alloc_end); | ||
7121 | if (ret) | ||
7122 | goto out; | ||
7123 | |||
6946 | if (alloc_start > inode->i_size) { | 7124 | if (alloc_start > inode->i_size) { |
6947 | ret = btrfs_cont_expand(inode, alloc_start); | 7125 | ret = btrfs_cont_expand(inode, alloc_start); |
6948 | if (ret) | 7126 | if (ret) |
@@ -7139,6 +7317,7 @@ static const struct inode_operations btrfs_symlink_inode_operations = { | |||
7139 | .readlink = generic_readlink, | 7317 | .readlink = generic_readlink, |
7140 | .follow_link = page_follow_link_light, | 7318 | .follow_link = page_follow_link_light, |
7141 | .put_link = page_put_link, | 7319 | .put_link = page_put_link, |
7320 | .getattr = btrfs_getattr, | ||
7142 | .permission = btrfs_permission, | 7321 | .permission = btrfs_permission, |
7143 | .setxattr = btrfs_setxattr, | 7322 | .setxattr = btrfs_setxattr, |
7144 | .getxattr = btrfs_getxattr, | 7323 | .getxattr = btrfs_getxattr, |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 463d91b4dd3a..f87552a1d7ea 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -233,7 +233,8 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
233 | struct btrfs_inode_item *inode_item; | 233 | struct btrfs_inode_item *inode_item; |
234 | struct extent_buffer *leaf; | 234 | struct extent_buffer *leaf; |
235 | struct btrfs_root *new_root; | 235 | struct btrfs_root *new_root; |
236 | struct inode *dir = dentry->d_parent->d_inode; | 236 | struct dentry *parent = dget_parent(dentry); |
237 | struct inode *dir; | ||
237 | int ret; | 238 | int ret; |
238 | int err; | 239 | int err; |
239 | u64 objectid; | 240 | u64 objectid; |
@@ -242,8 +243,13 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
242 | 243 | ||
243 | ret = btrfs_find_free_objectid(NULL, root->fs_info->tree_root, | 244 | ret = btrfs_find_free_objectid(NULL, root->fs_info->tree_root, |
244 | 0, &objectid); | 245 | 0, &objectid); |
245 | if (ret) | 246 | if (ret) { |
247 | dput(parent); | ||
246 | return ret; | 248 | return ret; |
249 | } | ||
250 | |||
251 | dir = parent->d_inode; | ||
252 | |||
247 | /* | 253 | /* |
248 | * 1 - inode item | 254 | * 1 - inode item |
249 | * 2 - refs | 255 | * 2 - refs |
@@ -251,8 +257,10 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
251 | * 2 - dir items | 257 | * 2 - dir items |
252 | */ | 258 | */ |
253 | trans = btrfs_start_transaction(root, 6); | 259 | trans = btrfs_start_transaction(root, 6); |
254 | if (IS_ERR(trans)) | 260 | if (IS_ERR(trans)) { |
261 | dput(parent); | ||
255 | return PTR_ERR(trans); | 262 | return PTR_ERR(trans); |
263 | } | ||
256 | 264 | ||
257 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, | 265 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, |
258 | 0, objectid, NULL, 0, 0, 0); | 266 | 0, objectid, NULL, 0, 0, 0); |
@@ -339,6 +347,7 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
339 | 347 | ||
340 | d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); | 348 | d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); |
341 | fail: | 349 | fail: |
350 | dput(parent); | ||
342 | if (async_transid) { | 351 | if (async_transid) { |
343 | *async_transid = trans->transid; | 352 | *async_transid = trans->transid; |
344 | err = btrfs_commit_transaction_async(trans, root, 1); | 353 | err = btrfs_commit_transaction_async(trans, root, 1); |
@@ -354,6 +363,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | |||
354 | char *name, int namelen, u64 *async_transid) | 363 | char *name, int namelen, u64 *async_transid) |
355 | { | 364 | { |
356 | struct inode *inode; | 365 | struct inode *inode; |
366 | struct dentry *parent; | ||
357 | struct btrfs_pending_snapshot *pending_snapshot; | 367 | struct btrfs_pending_snapshot *pending_snapshot; |
358 | struct btrfs_trans_handle *trans; | 368 | struct btrfs_trans_handle *trans; |
359 | int ret; | 369 | int ret; |
@@ -396,7 +406,9 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | |||
396 | 406 | ||
397 | btrfs_orphan_cleanup(pending_snapshot->snap); | 407 | btrfs_orphan_cleanup(pending_snapshot->snap); |
398 | 408 | ||
399 | inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); | 409 | parent = dget_parent(dentry); |
410 | inode = btrfs_lookup_dentry(parent->d_inode, dentry); | ||
411 | dput(parent); | ||
400 | if (IS_ERR(inode)) { | 412 | if (IS_ERR(inode)) { |
401 | ret = PTR_ERR(inode); | 413 | ret = PTR_ERR(inode); |
402 | goto fail; | 414 | goto fail; |
@@ -935,23 +947,42 @@ out: | |||
935 | 947 | ||
936 | static noinline int btrfs_ioctl_snap_create(struct file *file, | 948 | static noinline int btrfs_ioctl_snap_create(struct file *file, |
937 | void __user *arg, int subvol, | 949 | void __user *arg, int subvol, |
938 | int async) | 950 | int v2) |
939 | { | 951 | { |
940 | struct btrfs_ioctl_vol_args *vol_args = NULL; | 952 | struct btrfs_ioctl_vol_args *vol_args = NULL; |
941 | struct btrfs_ioctl_async_vol_args *async_vol_args = NULL; | 953 | struct btrfs_ioctl_vol_args_v2 *vol_args_v2 = NULL; |
942 | char *name; | 954 | char *name; |
943 | u64 fd; | 955 | u64 fd; |
944 | u64 transid = 0; | ||
945 | int ret; | 956 | int ret; |
946 | 957 | ||
947 | if (async) { | 958 | if (v2) { |
948 | async_vol_args = memdup_user(arg, sizeof(*async_vol_args)); | 959 | u64 transid = 0; |
949 | if (IS_ERR(async_vol_args)) | 960 | u64 *ptr = NULL; |
950 | return PTR_ERR(async_vol_args); | 961 | |
962 | vol_args_v2 = memdup_user(arg, sizeof(*vol_args_v2)); | ||
963 | if (IS_ERR(vol_args_v2)) | ||
964 | return PTR_ERR(vol_args_v2); | ||
965 | |||
966 | if (vol_args_v2->flags & ~BTRFS_SUBVOL_CREATE_ASYNC) { | ||
967 | ret = -EINVAL; | ||
968 | goto out; | ||
969 | } | ||
970 | |||
971 | name = vol_args_v2->name; | ||
972 | fd = vol_args_v2->fd; | ||
973 | vol_args_v2->name[BTRFS_SUBVOL_NAME_MAX] = '\0'; | ||
974 | |||
975 | if (vol_args_v2->flags & BTRFS_SUBVOL_CREATE_ASYNC) | ||
976 | ptr = &transid; | ||
977 | |||
978 | ret = btrfs_ioctl_snap_create_transid(file, name, fd, | ||
979 | subvol, ptr); | ||
951 | 980 | ||
952 | name = async_vol_args->name; | 981 | if (ret == 0 && ptr && |
953 | fd = async_vol_args->fd; | 982 | copy_to_user(arg + |
954 | async_vol_args->name[BTRFS_SNAPSHOT_NAME_MAX] = '\0'; | 983 | offsetof(struct btrfs_ioctl_vol_args_v2, |
984 | transid), ptr, sizeof(*ptr))) | ||
985 | ret = -EFAULT; | ||
955 | } else { | 986 | } else { |
956 | vol_args = memdup_user(arg, sizeof(*vol_args)); | 987 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
957 | if (IS_ERR(vol_args)) | 988 | if (IS_ERR(vol_args)) |
@@ -959,20 +990,13 @@ static noinline int btrfs_ioctl_snap_create(struct file *file, | |||
959 | name = vol_args->name; | 990 | name = vol_args->name; |
960 | fd = vol_args->fd; | 991 | fd = vol_args->fd; |
961 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | 992 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; |
962 | } | ||
963 | |||
964 | ret = btrfs_ioctl_snap_create_transid(file, name, fd, | ||
965 | subvol, &transid); | ||
966 | 993 | ||
967 | if (!ret && async) { | 994 | ret = btrfs_ioctl_snap_create_transid(file, name, fd, |
968 | if (copy_to_user(arg + | 995 | subvol, NULL); |
969 | offsetof(struct btrfs_ioctl_async_vol_args, | ||
970 | transid), &transid, sizeof(transid))) | ||
971 | return -EFAULT; | ||
972 | } | 996 | } |
973 | 997 | out: | |
974 | kfree(vol_args); | 998 | kfree(vol_args); |
975 | kfree(async_vol_args); | 999 | kfree(vol_args_v2); |
976 | 1000 | ||
977 | return ret; | 1001 | return ret; |
978 | } | 1002 | } |
@@ -1669,12 +1693,11 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1669 | olen = len = src->i_size - off; | 1693 | olen = len = src->i_size - off; |
1670 | /* if we extend to eof, continue to block boundary */ | 1694 | /* if we extend to eof, continue to block boundary */ |
1671 | if (off + len == src->i_size) | 1695 | if (off + len == src->i_size) |
1672 | len = ((src->i_size + bs-1) & ~(bs-1)) | 1696 | len = ALIGN(src->i_size, bs) - off; |
1673 | - off; | ||
1674 | 1697 | ||
1675 | /* verify the end result is block aligned */ | 1698 | /* verify the end result is block aligned */ |
1676 | if ((off & (bs-1)) || | 1699 | if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) || |
1677 | ((off + len) & (bs-1))) | 1700 | !IS_ALIGNED(destoff, bs)) |
1678 | goto out_unlock; | 1701 | goto out_unlock; |
1679 | 1702 | ||
1680 | /* do any pending delalloc/csum calc on src, one way or | 1703 | /* do any pending delalloc/csum calc on src, one way or |
@@ -1874,8 +1897,8 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1874 | * but shouldn't round up the file size | 1897 | * but shouldn't round up the file size |
1875 | */ | 1898 | */ |
1876 | endoff = new_key.offset + datal; | 1899 | endoff = new_key.offset + datal; |
1877 | if (endoff > off+olen) | 1900 | if (endoff > destoff+olen) |
1878 | endoff = off+olen; | 1901 | endoff = destoff+olen; |
1879 | if (endoff > inode->i_size) | 1902 | if (endoff > inode->i_size) |
1880 | btrfs_i_size_write(inode, endoff); | 1903 | btrfs_i_size_write(inode, endoff); |
1881 | 1904 | ||
@@ -2235,7 +2258,7 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
2235 | return btrfs_ioctl_getversion(file, argp); | 2258 | return btrfs_ioctl_getversion(file, argp); |
2236 | case BTRFS_IOC_SNAP_CREATE: | 2259 | case BTRFS_IOC_SNAP_CREATE: |
2237 | return btrfs_ioctl_snap_create(file, argp, 0, 0); | 2260 | return btrfs_ioctl_snap_create(file, argp, 0, 0); |
2238 | case BTRFS_IOC_SNAP_CREATE_ASYNC: | 2261 | case BTRFS_IOC_SNAP_CREATE_V2: |
2239 | return btrfs_ioctl_snap_create(file, argp, 0, 1); | 2262 | return btrfs_ioctl_snap_create(file, argp, 0, 1); |
2240 | case BTRFS_IOC_SUBVOL_CREATE: | 2263 | case BTRFS_IOC_SUBVOL_CREATE: |
2241 | return btrfs_ioctl_snap_create(file, argp, 1, 0); | 2264 | return btrfs_ioctl_snap_create(file, argp, 1, 0); |
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 17c99ebdf960..c344d12c646b 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h | |||
@@ -30,11 +30,15 @@ struct btrfs_ioctl_vol_args { | |||
30 | char name[BTRFS_PATH_NAME_MAX + 1]; | 30 | char name[BTRFS_PATH_NAME_MAX + 1]; |
31 | }; | 31 | }; |
32 | 32 | ||
33 | #define BTRFS_SNAPSHOT_NAME_MAX 4079 | 33 | #define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) |
34 | struct btrfs_ioctl_async_vol_args { | 34 | |
35 | #define BTRFS_SUBVOL_NAME_MAX 4039 | ||
36 | struct btrfs_ioctl_vol_args_v2 { | ||
35 | __s64 fd; | 37 | __s64 fd; |
36 | __u64 transid; | 38 | __u64 transid; |
37 | char name[BTRFS_SNAPSHOT_NAME_MAX + 1]; | 39 | __u64 flags; |
40 | __u64 unused[4]; | ||
41 | char name[BTRFS_SUBVOL_NAME_MAX + 1]; | ||
38 | }; | 42 | }; |
39 | 43 | ||
40 | #define BTRFS_INO_LOOKUP_PATH_MAX 4080 | 44 | #define BTRFS_INO_LOOKUP_PATH_MAX 4080 |
@@ -187,6 +191,6 @@ struct btrfs_ioctl_space_args { | |||
187 | struct btrfs_ioctl_space_args) | 191 | struct btrfs_ioctl_space_args) |
188 | #define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64) | 192 | #define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64) |
189 | #define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) | 193 | #define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) |
190 | #define BTRFS_IOC_SNAP_CREATE_ASYNC _IOW(BTRFS_IOCTL_MAGIC, 23, \ | 194 | #define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \ |
191 | struct btrfs_ioctl_async_vol_args) | 195 | struct btrfs_ioctl_vol_args_v2) |
192 | #endif | 196 | #endif |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index f4621f6deca1..ae7737e352c9 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -250,6 +250,73 @@ int btrfs_add_ordered_sum(struct inode *inode, | |||
250 | 250 | ||
251 | /* | 251 | /* |
252 | * this is used to account for finished IO across a given range | 252 | * this is used to account for finished IO across a given range |
253 | * of the file. The IO may span ordered extents. If | ||
254 | * a given ordered_extent is completely done, 1 is returned, otherwise | ||
255 | * 0. | ||
256 | * | ||
257 | * test_and_set_bit on a flag in the struct btrfs_ordered_extent is used | ||
258 | * to make sure this function only returns 1 once for a given ordered extent. | ||
259 | * | ||
260 | * file_offset is updated to one byte past the range that is recorded as | ||
261 | * complete. This allows you to walk forward in the file. | ||
262 | */ | ||
263 | int btrfs_dec_test_first_ordered_pending(struct inode *inode, | ||
264 | struct btrfs_ordered_extent **cached, | ||
265 | u64 *file_offset, u64 io_size) | ||
266 | { | ||
267 | struct btrfs_ordered_inode_tree *tree; | ||
268 | struct rb_node *node; | ||
269 | struct btrfs_ordered_extent *entry = NULL; | ||
270 | int ret; | ||
271 | u64 dec_end; | ||
272 | u64 dec_start; | ||
273 | u64 to_dec; | ||
274 | |||
275 | tree = &BTRFS_I(inode)->ordered_tree; | ||
276 | spin_lock(&tree->lock); | ||
277 | node = tree_search(tree, *file_offset); | ||
278 | if (!node) { | ||
279 | ret = 1; | ||
280 | goto out; | ||
281 | } | ||
282 | |||
283 | entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); | ||
284 | if (!offset_in_entry(entry, *file_offset)) { | ||
285 | ret = 1; | ||
286 | goto out; | ||
287 | } | ||
288 | |||
289 | dec_start = max(*file_offset, entry->file_offset); | ||
290 | dec_end = min(*file_offset + io_size, entry->file_offset + | ||
291 | entry->len); | ||
292 | *file_offset = dec_end; | ||
293 | if (dec_start > dec_end) { | ||
294 | printk(KERN_CRIT "bad ordering dec_start %llu end %llu\n", | ||
295 | (unsigned long long)dec_start, | ||
296 | (unsigned long long)dec_end); | ||
297 | } | ||
298 | to_dec = dec_end - dec_start; | ||
299 | if (to_dec > entry->bytes_left) { | ||
300 | printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n", | ||
301 | (unsigned long long)entry->bytes_left, | ||
302 | (unsigned long long)to_dec); | ||
303 | } | ||
304 | entry->bytes_left -= to_dec; | ||
305 | if (entry->bytes_left == 0) | ||
306 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); | ||
307 | else | ||
308 | ret = 1; | ||
309 | out: | ||
310 | if (!ret && cached && entry) { | ||
311 | *cached = entry; | ||
312 | atomic_inc(&entry->refs); | ||
313 | } | ||
314 | spin_unlock(&tree->lock); | ||
315 | return ret == 0; | ||
316 | } | ||
317 | |||
318 | /* | ||
319 | * this is used to account for finished IO across a given range | ||
253 | * of the file. The IO should not span ordered extents. If | 320 | * of the file. The IO should not span ordered extents. If |
254 | * a given ordered_extent is completely done, 1 is returned, otherwise | 321 | * a given ordered_extent is completely done, 1 is returned, otherwise |
255 | * 0. | 322 | * 0. |
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 8ac365492a3f..61dca83119dd 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
@@ -141,6 +141,9 @@ int btrfs_remove_ordered_extent(struct inode *inode, | |||
141 | int btrfs_dec_test_ordered_pending(struct inode *inode, | 141 | int btrfs_dec_test_ordered_pending(struct inode *inode, |
142 | struct btrfs_ordered_extent **cached, | 142 | struct btrfs_ordered_extent **cached, |
143 | u64 file_offset, u64 io_size); | 143 | u64 file_offset, u64 io_size); |
144 | int btrfs_dec_test_first_ordered_pending(struct inode *inode, | ||
145 | struct btrfs_ordered_extent **cached, | ||
146 | u64 *file_offset, u64 io_size); | ||
144 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | 147 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, |
145 | u64 start, u64 len, u64 disk_len, int type); | 148 | u64 start, u64 len, u64 disk_len, int type); |
146 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, | 149 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, |
diff --git a/fs/btrfs/orphan.c b/fs/btrfs/orphan.c index 79cba5fbc28e..f8be250963a0 100644 --- a/fs/btrfs/orphan.c +++ b/fs/btrfs/orphan.c | |||
@@ -56,8 +56,12 @@ int btrfs_del_orphan_item(struct btrfs_trans_handle *trans, | |||
56 | return -ENOMEM; | 56 | return -ENOMEM; |
57 | 57 | ||
58 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | 58 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); |
59 | if (ret) | 59 | if (ret < 0) |
60 | goto out; | 60 | goto out; |
61 | if (ret) { | ||
62 | ret = -ENOENT; | ||
63 | goto out; | ||
64 | } | ||
61 | 65 | ||
62 | ret = btrfs_del_item(trans, root, path); | 66 | ret = btrfs_del_item(trans, root, path); |
63 | 67 | ||
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 8299a25ffc8f..883c6fa1367e 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -244,6 +244,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
244 | case Opt_space_cache: | 244 | case Opt_space_cache: |
245 | printk(KERN_INFO "btrfs: enabling disk space caching\n"); | 245 | printk(KERN_INFO "btrfs: enabling disk space caching\n"); |
246 | btrfs_set_opt(info->mount_opt, SPACE_CACHE); | 246 | btrfs_set_opt(info->mount_opt, SPACE_CACHE); |
247 | break; | ||
247 | case Opt_clear_cache: | 248 | case Opt_clear_cache: |
248 | printk(KERN_INFO "btrfs: force clearing of disk cache\n"); | 249 | printk(KERN_INFO "btrfs: force clearing of disk cache\n"); |
249 | btrfs_set_opt(info->mount_opt, CLEAR_CACHE); | 250 | btrfs_set_opt(info->mount_opt, CLEAR_CACHE); |
@@ -562,12 +563,26 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
562 | 563 | ||
563 | static int btrfs_test_super(struct super_block *s, void *data) | 564 | static int btrfs_test_super(struct super_block *s, void *data) |
564 | { | 565 | { |
565 | struct btrfs_fs_devices *test_fs_devices = data; | 566 | struct btrfs_root *test_root = data; |
566 | struct btrfs_root *root = btrfs_sb(s); | 567 | struct btrfs_root *root = btrfs_sb(s); |
567 | 568 | ||
568 | return root->fs_info->fs_devices == test_fs_devices; | 569 | /* |
570 | * If this super block is going away, return false as it | ||
571 | * can't match as an existing super block. | ||
572 | */ | ||
573 | if (!atomic_read(&s->s_active)) | ||
574 | return 0; | ||
575 | return root->fs_info->fs_devices == test_root->fs_info->fs_devices; | ||
576 | } | ||
577 | |||
578 | static int btrfs_set_super(struct super_block *s, void *data) | ||
579 | { | ||
580 | s->s_fs_info = data; | ||
581 | |||
582 | return set_anon_super(s, data); | ||
569 | } | 583 | } |
570 | 584 | ||
585 | |||
571 | /* | 586 | /* |
572 | * Find a superblock for the given device / mount point. | 587 | * Find a superblock for the given device / mount point. |
573 | * | 588 | * |
@@ -581,6 +596,8 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, | |||
581 | struct super_block *s; | 596 | struct super_block *s; |
582 | struct dentry *root; | 597 | struct dentry *root; |
583 | struct btrfs_fs_devices *fs_devices = NULL; | 598 | struct btrfs_fs_devices *fs_devices = NULL; |
599 | struct btrfs_root *tree_root = NULL; | ||
600 | struct btrfs_fs_info *fs_info = NULL; | ||
584 | fmode_t mode = FMODE_READ; | 601 | fmode_t mode = FMODE_READ; |
585 | char *subvol_name = NULL; | 602 | char *subvol_name = NULL; |
586 | u64 subvol_objectid = 0; | 603 | u64 subvol_objectid = 0; |
@@ -608,8 +625,24 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, | |||
608 | goto error_close_devices; | 625 | goto error_close_devices; |
609 | } | 626 | } |
610 | 627 | ||
628 | /* | ||
629 | * Setup a dummy root and fs_info for test/set super. This is because | ||
630 | * we don't actually fill this stuff out until open_ctree, but we need | ||
631 | * it for searching for existing supers, so this lets us do that and | ||
632 | * then open_ctree will properly initialize everything later. | ||
633 | */ | ||
634 | fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_NOFS); | ||
635 | tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); | ||
636 | if (!fs_info || !tree_root) { | ||
637 | error = -ENOMEM; | ||
638 | goto error_close_devices; | ||
639 | } | ||
640 | fs_info->tree_root = tree_root; | ||
641 | fs_info->fs_devices = fs_devices; | ||
642 | tree_root->fs_info = fs_info; | ||
643 | |||
611 | bdev = fs_devices->latest_bdev; | 644 | bdev = fs_devices->latest_bdev; |
612 | s = sget(fs_type, btrfs_test_super, set_anon_super, fs_devices); | 645 | s = sget(fs_type, btrfs_test_super, btrfs_set_super, tree_root); |
613 | if (IS_ERR(s)) | 646 | if (IS_ERR(s)) |
614 | goto error_s; | 647 | goto error_s; |
615 | 648 | ||
@@ -652,9 +685,9 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, | |||
652 | mutex_unlock(&root->d_inode->i_mutex); | 685 | mutex_unlock(&root->d_inode->i_mutex); |
653 | 686 | ||
654 | if (IS_ERR(new_root)) { | 687 | if (IS_ERR(new_root)) { |
688 | dput(root); | ||
655 | deactivate_locked_super(s); | 689 | deactivate_locked_super(s); |
656 | error = PTR_ERR(new_root); | 690 | error = PTR_ERR(new_root); |
657 | dput(root); | ||
658 | goto error_free_subvol_name; | 691 | goto error_free_subvol_name; |
659 | } | 692 | } |
660 | if (!new_root->d_inode) { | 693 | if (!new_root->d_inode) { |
@@ -675,6 +708,8 @@ error_s: | |||
675 | error = PTR_ERR(s); | 708 | error = PTR_ERR(s); |
676 | error_close_devices: | 709 | error_close_devices: |
677 | btrfs_close_devices(fs_devices); | 710 | btrfs_close_devices(fs_devices); |
711 | kfree(fs_info); | ||
712 | kfree(tree_root); | ||
678 | error_free_subvol_name: | 713 | error_free_subvol_name: |
679 | kfree(subvol_name); | 714 | kfree(subvol_name); |
680 | return ERR_PTR(error); | 715 | return ERR_PTR(error); |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 1fffbc017bdf..f50e931fc217 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -902,6 +902,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
902 | struct btrfs_root *root = pending->root; | 902 | struct btrfs_root *root = pending->root; |
903 | struct btrfs_root *parent_root; | 903 | struct btrfs_root *parent_root; |
904 | struct inode *parent_inode; | 904 | struct inode *parent_inode; |
905 | struct dentry *parent; | ||
905 | struct dentry *dentry; | 906 | struct dentry *dentry; |
906 | struct extent_buffer *tmp; | 907 | struct extent_buffer *tmp; |
907 | struct extent_buffer *old; | 908 | struct extent_buffer *old; |
@@ -941,7 +942,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
941 | trans->block_rsv = &pending->block_rsv; | 942 | trans->block_rsv = &pending->block_rsv; |
942 | 943 | ||
943 | dentry = pending->dentry; | 944 | dentry = pending->dentry; |
944 | parent_inode = dentry->d_parent->d_inode; | 945 | parent = dget_parent(dentry); |
946 | parent_inode = parent->d_inode; | ||
945 | parent_root = BTRFS_I(parent_inode)->root; | 947 | parent_root = BTRFS_I(parent_inode)->root; |
946 | record_root_in_trans(trans, parent_root); | 948 | record_root_in_trans(trans, parent_root); |
947 | 949 | ||
@@ -989,6 +991,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
989 | parent_inode->i_ino, index, | 991 | parent_inode->i_ino, index, |
990 | dentry->d_name.name, dentry->d_name.len); | 992 | dentry->d_name.name, dentry->d_name.len); |
991 | BUG_ON(ret); | 993 | BUG_ON(ret); |
994 | dput(parent); | ||
992 | 995 | ||
993 | key.offset = (u64)-1; | 996 | key.offset = (u64)-1; |
994 | pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key); | 997 | pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key); |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index a29f19384a27..054744ac5719 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -2869,6 +2869,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, | |||
2869 | { | 2869 | { |
2870 | int ret = 0; | 2870 | int ret = 0; |
2871 | struct btrfs_root *root; | 2871 | struct btrfs_root *root; |
2872 | struct dentry *old_parent = NULL; | ||
2872 | 2873 | ||
2873 | /* | 2874 | /* |
2874 | * for regular files, if its inode is already on disk, we don't | 2875 | * for regular files, if its inode is already on disk, we don't |
@@ -2910,10 +2911,13 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, | |||
2910 | if (IS_ROOT(parent)) | 2911 | if (IS_ROOT(parent)) |
2911 | break; | 2912 | break; |
2912 | 2913 | ||
2913 | parent = parent->d_parent; | 2914 | parent = dget_parent(parent); |
2915 | dput(old_parent); | ||
2916 | old_parent = parent; | ||
2914 | inode = parent->d_inode; | 2917 | inode = parent->d_inode; |
2915 | 2918 | ||
2916 | } | 2919 | } |
2920 | dput(old_parent); | ||
2917 | out: | 2921 | out: |
2918 | return ret; | 2922 | return ret; |
2919 | } | 2923 | } |
@@ -2945,6 +2949,7 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
2945 | { | 2949 | { |
2946 | int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL; | 2950 | int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL; |
2947 | struct super_block *sb; | 2951 | struct super_block *sb; |
2952 | struct dentry *old_parent = NULL; | ||
2948 | int ret = 0; | 2953 | int ret = 0; |
2949 | u64 last_committed = root->fs_info->last_trans_committed; | 2954 | u64 last_committed = root->fs_info->last_trans_committed; |
2950 | 2955 | ||
@@ -3016,10 +3021,13 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
3016 | if (IS_ROOT(parent)) | 3021 | if (IS_ROOT(parent)) |
3017 | break; | 3022 | break; |
3018 | 3023 | ||
3019 | parent = parent->d_parent; | 3024 | parent = dget_parent(parent); |
3025 | dput(old_parent); | ||
3026 | old_parent = parent; | ||
3020 | } | 3027 | } |
3021 | ret = 0; | 3028 | ret = 0; |
3022 | end_trans: | 3029 | end_trans: |
3030 | dput(old_parent); | ||
3023 | if (ret < 0) { | 3031 | if (ret < 0) { |
3024 | BUG_ON(ret != -ENOSPC); | 3032 | BUG_ON(ret != -ENOSPC); |
3025 | root->fs_info->last_trans_log_full_commit = trans->transid; | 3033 | root->fs_info->last_trans_log_full_commit = trans->transid; |
@@ -3039,8 +3047,13 @@ end_no_trans: | |||
3039 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, | 3047 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, |
3040 | struct btrfs_root *root, struct dentry *dentry) | 3048 | struct btrfs_root *root, struct dentry *dentry) |
3041 | { | 3049 | { |
3042 | return btrfs_log_inode_parent(trans, root, dentry->d_inode, | 3050 | struct dentry *parent = dget_parent(dentry); |
3043 | dentry->d_parent, 0); | 3051 | int ret; |
3052 | |||
3053 | ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent, 0); | ||
3054 | dput(parent); | ||
3055 | |||
3056 | return ret; | ||
3044 | } | 3057 | } |
3045 | 3058 | ||
3046 | /* | 3059 | /* |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index cc04dc1445d6..6b9884507837 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -412,12 +412,16 @@ static noinline int device_list_add(const char *path, | |||
412 | 412 | ||
413 | device->fs_devices = fs_devices; | 413 | device->fs_devices = fs_devices; |
414 | fs_devices->num_devices++; | 414 | fs_devices->num_devices++; |
415 | } else if (strcmp(device->name, path)) { | 415 | } else if (!device->name || strcmp(device->name, path)) { |
416 | name = kstrdup(path, GFP_NOFS); | 416 | name = kstrdup(path, GFP_NOFS); |
417 | if (!name) | 417 | if (!name) |
418 | return -ENOMEM; | 418 | return -ENOMEM; |
419 | kfree(device->name); | 419 | kfree(device->name); |
420 | device->name = name; | 420 | device->name = name; |
421 | if (device->missing) { | ||
422 | fs_devices->missing_devices--; | ||
423 | device->missing = 0; | ||
424 | } | ||
421 | } | 425 | } |
422 | 426 | ||
423 | if (found_transid > fs_devices->latest_trans) { | 427 | if (found_transid > fs_devices->latest_trans) { |
@@ -1236,6 +1240,9 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1236 | 1240 | ||
1237 | device->fs_devices->num_devices--; | 1241 | device->fs_devices->num_devices--; |
1238 | 1242 | ||
1243 | if (device->missing) | ||
1244 | root->fs_info->fs_devices->missing_devices--; | ||
1245 | |||
1239 | next_device = list_entry(root->fs_info->fs_devices->devices.next, | 1246 | next_device = list_entry(root->fs_info->fs_devices->devices.next, |
1240 | struct btrfs_device, dev_list); | 1247 | struct btrfs_device, dev_list); |
1241 | if (device->bdev == root->fs_info->sb->s_bdev) | 1248 | if (device->bdev == root->fs_info->sb->s_bdev) |
@@ -3080,7 +3087,9 @@ static struct btrfs_device *add_missing_dev(struct btrfs_root *root, | |||
3080 | device->devid = devid; | 3087 | device->devid = devid; |
3081 | device->work.func = pending_bios_fn; | 3088 | device->work.func = pending_bios_fn; |
3082 | device->fs_devices = fs_devices; | 3089 | device->fs_devices = fs_devices; |
3090 | device->missing = 1; | ||
3083 | fs_devices->num_devices++; | 3091 | fs_devices->num_devices++; |
3092 | fs_devices->missing_devices++; | ||
3084 | spin_lock_init(&device->io_lock); | 3093 | spin_lock_init(&device->io_lock); |
3085 | INIT_LIST_HEAD(&device->dev_alloc_list); | 3094 | INIT_LIST_HEAD(&device->dev_alloc_list); |
3086 | memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE); | 3095 | memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE); |
@@ -3278,6 +3287,15 @@ static int read_one_dev(struct btrfs_root *root, | |||
3278 | device = add_missing_dev(root, devid, dev_uuid); | 3287 | device = add_missing_dev(root, devid, dev_uuid); |
3279 | if (!device) | 3288 | if (!device) |
3280 | return -ENOMEM; | 3289 | return -ENOMEM; |
3290 | } else if (!device->missing) { | ||
3291 | /* | ||
3292 | * this happens when a device that was properly setup | ||
3293 | * in the device info lists suddenly goes bad. | ||
3294 | * device->bdev is NULL, and so we have to set | ||
3295 | * device->missing to one here | ||
3296 | */ | ||
3297 | root->fs_info->fs_devices->missing_devices++; | ||
3298 | device->missing = 1; | ||
3281 | } | 3299 | } |
3282 | } | 3300 | } |
3283 | 3301 | ||
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 2b638b6e4eea..2740db49eb04 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -44,6 +44,7 @@ struct btrfs_device { | |||
44 | 44 | ||
45 | int writeable; | 45 | int writeable; |
46 | int in_fs_metadata; | 46 | int in_fs_metadata; |
47 | int missing; | ||
47 | 48 | ||
48 | spinlock_t io_lock; | 49 | spinlock_t io_lock; |
49 | 50 | ||
@@ -93,6 +94,7 @@ struct btrfs_fs_devices { | |||
93 | u64 num_devices; | 94 | u64 num_devices; |
94 | u64 open_devices; | 95 | u64 open_devices; |
95 | u64 rw_devices; | 96 | u64 rw_devices; |
97 | u64 missing_devices; | ||
96 | u64 total_rw_bytes; | 98 | u64 total_rw_bytes; |
97 | struct block_device *latest_bdev; | 99 | struct block_device *latest_bdev; |
98 | 100 | ||
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index e9c874abc9e1..561438b6a50c 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -204,7 +204,7 @@ static int readpage_nounlock(struct file *filp, struct page *page) | |||
204 | err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout, | 204 | err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout, |
205 | page->index << PAGE_CACHE_SHIFT, &len, | 205 | page->index << PAGE_CACHE_SHIFT, &len, |
206 | ci->i_truncate_seq, ci->i_truncate_size, | 206 | ci->i_truncate_seq, ci->i_truncate_size, |
207 | &page, 1); | 207 | &page, 1, 0); |
208 | if (err == -ENOENT) | 208 | if (err == -ENOENT) |
209 | err = 0; | 209 | err = 0; |
210 | if (err < 0) { | 210 | if (err < 0) { |
@@ -287,7 +287,7 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, | |||
287 | rc = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout, | 287 | rc = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout, |
288 | offset, &len, | 288 | offset, &len, |
289 | ci->i_truncate_seq, ci->i_truncate_size, | 289 | ci->i_truncate_seq, ci->i_truncate_size, |
290 | pages, nr_pages); | 290 | pages, nr_pages, 0); |
291 | if (rc == -ENOENT) | 291 | if (rc == -ENOENT) |
292 | rc = 0; | 292 | rc = 0; |
293 | if (rc < 0) | 293 | if (rc < 0) |
@@ -774,7 +774,7 @@ get_more_pages: | |||
774 | snapc, do_sync, | 774 | snapc, do_sync, |
775 | ci->i_truncate_seq, | 775 | ci->i_truncate_seq, |
776 | ci->i_truncate_size, | 776 | ci->i_truncate_size, |
777 | &inode->i_mtime, true, 1); | 777 | &inode->i_mtime, true, 1, 0); |
778 | max_pages = req->r_num_pages; | 778 | max_pages = req->r_num_pages; |
779 | 779 | ||
780 | alloc_page_vec(fsc, req); | 780 | alloc_page_vec(fsc, req); |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 98ab13e2b71d..60d27bc9eb83 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -1430,8 +1430,8 @@ static int try_nonblocking_invalidate(struct inode *inode) | |||
1430 | invalidating_gen == ci->i_rdcache_gen) { | 1430 | invalidating_gen == ci->i_rdcache_gen) { |
1431 | /* success. */ | 1431 | /* success. */ |
1432 | dout("try_nonblocking_invalidate %p success\n", inode); | 1432 | dout("try_nonblocking_invalidate %p success\n", inode); |
1433 | ci->i_rdcache_gen = 0; | 1433 | /* save any racing async invalidate some trouble */ |
1434 | ci->i_rdcache_revoking = 0; | 1434 | ci->i_rdcache_revoking = ci->i_rdcache_gen - 1; |
1435 | return 0; | 1435 | return 0; |
1436 | } | 1436 | } |
1437 | dout("try_nonblocking_invalidate %p failed\n", inode); | 1437 | dout("try_nonblocking_invalidate %p failed\n", inode); |
@@ -2273,8 +2273,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2273 | { | 2273 | { |
2274 | struct ceph_inode_info *ci = ceph_inode(inode); | 2274 | struct ceph_inode_info *ci = ceph_inode(inode); |
2275 | int mds = session->s_mds; | 2275 | int mds = session->s_mds; |
2276 | unsigned seq = le32_to_cpu(grant->seq); | 2276 | int seq = le32_to_cpu(grant->seq); |
2277 | unsigned issue_seq = le32_to_cpu(grant->issue_seq); | ||
2278 | int newcaps = le32_to_cpu(grant->caps); | 2277 | int newcaps = le32_to_cpu(grant->caps); |
2279 | int issued, implemented, used, wanted, dirty; | 2278 | int issued, implemented, used, wanted, dirty; |
2280 | u64 size = le64_to_cpu(grant->size); | 2279 | u64 size = le64_to_cpu(grant->size); |
@@ -2286,8 +2285,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2286 | int revoked_rdcache = 0; | 2285 | int revoked_rdcache = 0; |
2287 | int queue_invalidate = 0; | 2286 | int queue_invalidate = 0; |
2288 | 2287 | ||
2289 | dout("handle_cap_grant inode %p cap %p mds%d seq %u/%u %s\n", | 2288 | dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n", |
2290 | inode, cap, mds, seq, issue_seq, ceph_cap_string(newcaps)); | 2289 | inode, cap, mds, seq, ceph_cap_string(newcaps)); |
2291 | dout(" size %llu max_size %llu, i_size %llu\n", size, max_size, | 2290 | dout(" size %llu max_size %llu, i_size %llu\n", size, max_size, |
2292 | inode->i_size); | 2291 | inode->i_size); |
2293 | 2292 | ||
@@ -2383,7 +2382,6 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2383 | } | 2382 | } |
2384 | 2383 | ||
2385 | cap->seq = seq; | 2384 | cap->seq = seq; |
2386 | cap->issue_seq = issue_seq; | ||
2387 | 2385 | ||
2388 | /* file layout may have changed */ | 2386 | /* file layout may have changed */ |
2389 | ci->i_layout = grant->layout; | 2387 | ci->i_layout = grant->layout; |
@@ -2691,6 +2689,11 @@ static void handle_cap_import(struct ceph_mds_client *mdsc, | |||
2691 | NULL /* no caps context */); | 2689 | NULL /* no caps context */); |
2692 | try_flush_caps(inode, session, NULL); | 2690 | try_flush_caps(inode, session, NULL); |
2693 | up_read(&mdsc->snap_rwsem); | 2691 | up_read(&mdsc->snap_rwsem); |
2692 | |||
2693 | /* make sure we re-request max_size, if necessary */ | ||
2694 | spin_lock(&inode->i_lock); | ||
2695 | ci->i_requested_max_size = 0; | ||
2696 | spin_unlock(&inode->i_lock); | ||
2694 | } | 2697 | } |
2695 | 2698 | ||
2696 | /* | 2699 | /* |
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index e0a2dc6fcafc..d902948a90d8 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
@@ -40,7 +40,8 @@ int ceph_init_dentry(struct dentry *dentry) | |||
40 | if (dentry->d_fsdata) | 40 | if (dentry->d_fsdata) |
41 | return 0; | 41 | return 0; |
42 | 42 | ||
43 | if (ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP) | 43 | if (dentry->d_parent == NULL || /* nfs fh_to_dentry */ |
44 | ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP) | ||
44 | dentry->d_op = &ceph_dentry_ops; | 45 | dentry->d_op = &ceph_dentry_ops; |
45 | else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR) | 46 | else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR) |
46 | dentry->d_op = &ceph_snapdir_dentry_ops; | 47 | dentry->d_op = &ceph_snapdir_dentry_ops; |
@@ -114,8 +115,8 @@ static int __dcache_readdir(struct file *filp, | |||
114 | spin_lock(&dcache_lock); | 115 | spin_lock(&dcache_lock); |
115 | 116 | ||
116 | /* start at beginning? */ | 117 | /* start at beginning? */ |
117 | if (filp->f_pos == 2 || (last && | 118 | if (filp->f_pos == 2 || last == NULL || |
118 | filp->f_pos < ceph_dentry(last)->offset)) { | 119 | filp->f_pos < ceph_dentry(last)->offset) { |
119 | if (list_empty(&parent->d_subdirs)) | 120 | if (list_empty(&parent->d_subdirs)) |
120 | goto out_unlock; | 121 | goto out_unlock; |
121 | p = parent->d_subdirs.prev; | 122 | p = parent->d_subdirs.prev; |
@@ -336,7 +337,10 @@ more: | |||
336 | if (req->r_reply_info.dir_end) { | 337 | if (req->r_reply_info.dir_end) { |
337 | kfree(fi->last_name); | 338 | kfree(fi->last_name); |
338 | fi->last_name = NULL; | 339 | fi->last_name = NULL; |
339 | fi->next_offset = 2; | 340 | if (ceph_frag_is_rightmost(frag)) |
341 | fi->next_offset = 2; | ||
342 | else | ||
343 | fi->next_offset = 0; | ||
340 | } else { | 344 | } else { |
341 | rinfo = &req->r_reply_info; | 345 | rinfo = &req->r_reply_info; |
342 | err = note_last_dentry(fi, | 346 | err = note_last_dentry(fi, |
@@ -355,18 +359,22 @@ more: | |||
355 | u64 pos = ceph_make_fpos(frag, off); | 359 | u64 pos = ceph_make_fpos(frag, off); |
356 | struct ceph_mds_reply_inode *in = | 360 | struct ceph_mds_reply_inode *in = |
357 | rinfo->dir_in[off - fi->offset].in; | 361 | rinfo->dir_in[off - fi->offset].in; |
362 | struct ceph_vino vino; | ||
363 | ino_t ino; | ||
364 | |||
358 | dout("readdir off %d (%d/%d) -> %lld '%.*s' %p\n", | 365 | dout("readdir off %d (%d/%d) -> %lld '%.*s' %p\n", |
359 | off, off - fi->offset, rinfo->dir_nr, pos, | 366 | off, off - fi->offset, rinfo->dir_nr, pos, |
360 | rinfo->dir_dname_len[off - fi->offset], | 367 | rinfo->dir_dname_len[off - fi->offset], |
361 | rinfo->dir_dname[off - fi->offset], in); | 368 | rinfo->dir_dname[off - fi->offset], in); |
362 | BUG_ON(!in); | 369 | BUG_ON(!in); |
363 | ftype = le32_to_cpu(in->mode) >> 12; | 370 | ftype = le32_to_cpu(in->mode) >> 12; |
371 | vino.ino = le64_to_cpu(in->ino); | ||
372 | vino.snap = le64_to_cpu(in->snapid); | ||
373 | ino = ceph_vino_to_ino(vino); | ||
364 | if (filldir(dirent, | 374 | if (filldir(dirent, |
365 | rinfo->dir_dname[off - fi->offset], | 375 | rinfo->dir_dname[off - fi->offset], |
366 | rinfo->dir_dname_len[off - fi->offset], | 376 | rinfo->dir_dname_len[off - fi->offset], |
367 | pos, | 377 | pos, ino, ftype) < 0) { |
368 | le64_to_cpu(in->ino), | ||
369 | ftype) < 0) { | ||
370 | dout("filldir stopping us...\n"); | 378 | dout("filldir stopping us...\n"); |
371 | return 0; | 379 | return 0; |
372 | } | 380 | } |
@@ -414,6 +422,7 @@ static void reset_readdir(struct ceph_file_info *fi) | |||
414 | fi->last_readdir = NULL; | 422 | fi->last_readdir = NULL; |
415 | } | 423 | } |
416 | kfree(fi->last_name); | 424 | kfree(fi->last_name); |
425 | fi->last_name = NULL; | ||
417 | fi->next_offset = 2; /* compensate for . and .. */ | 426 | fi->next_offset = 2; /* compensate for . and .. */ |
418 | if (fi->dentry) { | 427 | if (fi->dentry) { |
419 | dput(fi->dentry); | 428 | dput(fi->dentry); |
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index e77c28cf3690..7d0e4a82d898 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
@@ -154,11 +154,13 @@ int ceph_open(struct inode *inode, struct file *file) | |||
154 | } | 154 | } |
155 | 155 | ||
156 | /* | 156 | /* |
157 | * No need to block if we have any caps. Update wanted set | 157 | * No need to block if we have caps on the auth MDS (for |
158 | * write) or any MDS (for read). Update wanted set | ||
158 | * asynchronously. | 159 | * asynchronously. |
159 | */ | 160 | */ |
160 | spin_lock(&inode->i_lock); | 161 | spin_lock(&inode->i_lock); |
161 | if (__ceph_is_any_real_caps(ci)) { | 162 | if (__ceph_is_any_real_caps(ci) && |
163 | (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) { | ||
162 | int mds_wanted = __ceph_caps_mds_wanted(ci); | 164 | int mds_wanted = __ceph_caps_mds_wanted(ci); |
163 | int issued = __ceph_caps_issued(ci, NULL); | 165 | int issued = __ceph_caps_issued(ci, NULL); |
164 | 166 | ||
@@ -280,11 +282,13 @@ int ceph_release(struct inode *inode, struct file *file) | |||
280 | static int striped_read(struct inode *inode, | 282 | static int striped_read(struct inode *inode, |
281 | u64 off, u64 len, | 283 | u64 off, u64 len, |
282 | struct page **pages, int num_pages, | 284 | struct page **pages, int num_pages, |
283 | int *checkeof) | 285 | int *checkeof, bool align_to_pages, |
286 | unsigned long buf_align) | ||
284 | { | 287 | { |
285 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); | 288 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
286 | struct ceph_inode_info *ci = ceph_inode(inode); | 289 | struct ceph_inode_info *ci = ceph_inode(inode); |
287 | u64 pos, this_len; | 290 | u64 pos, this_len; |
291 | int io_align, page_align; | ||
288 | int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */ | 292 | int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */ |
289 | int left, pages_left; | 293 | int left, pages_left; |
290 | int read; | 294 | int read; |
@@ -300,14 +304,19 @@ static int striped_read(struct inode *inode, | |||
300 | page_pos = pages; | 304 | page_pos = pages; |
301 | pages_left = num_pages; | 305 | pages_left = num_pages; |
302 | read = 0; | 306 | read = 0; |
307 | io_align = off & ~PAGE_MASK; | ||
303 | 308 | ||
304 | more: | 309 | more: |
310 | if (align_to_pages) | ||
311 | page_align = (pos - io_align + buf_align) & ~PAGE_MASK; | ||
312 | else | ||
313 | page_align = pos & ~PAGE_MASK; | ||
305 | this_len = left; | 314 | this_len = left; |
306 | ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode), | 315 | ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode), |
307 | &ci->i_layout, pos, &this_len, | 316 | &ci->i_layout, pos, &this_len, |
308 | ci->i_truncate_seq, | 317 | ci->i_truncate_seq, |
309 | ci->i_truncate_size, | 318 | ci->i_truncate_size, |
310 | page_pos, pages_left); | 319 | page_pos, pages_left, page_align); |
311 | hit_stripe = this_len < left; | 320 | hit_stripe = this_len < left; |
312 | was_short = ret >= 0 && ret < this_len; | 321 | was_short = ret >= 0 && ret < this_len; |
313 | if (ret == -ENOENT) | 322 | if (ret == -ENOENT) |
@@ -368,32 +377,34 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data, | |||
368 | struct inode *inode = file->f_dentry->d_inode; | 377 | struct inode *inode = file->f_dentry->d_inode; |
369 | struct page **pages; | 378 | struct page **pages; |
370 | u64 off = *poff; | 379 | u64 off = *poff; |
371 | int num_pages = calc_pages_for(off, len); | 380 | int num_pages, ret; |
372 | int ret; | ||
373 | 381 | ||
374 | dout("sync_read on file %p %llu~%u %s\n", file, off, len, | 382 | dout("sync_read on file %p %llu~%u %s\n", file, off, len, |
375 | (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); | 383 | (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); |
376 | 384 | ||
377 | if (file->f_flags & O_DIRECT) { | 385 | if (file->f_flags & O_DIRECT) { |
378 | pages = ceph_get_direct_page_vector(data, num_pages, off, len); | 386 | num_pages = calc_pages_for((unsigned long)data, len); |
379 | 387 | pages = ceph_get_direct_page_vector(data, num_pages, true); | |
380 | /* | ||
381 | * flush any page cache pages in this range. this | ||
382 | * will make concurrent normal and O_DIRECT io slow, | ||
383 | * but it will at least behave sensibly when they are | ||
384 | * in sequence. | ||
385 | */ | ||
386 | } else { | 388 | } else { |
389 | num_pages = calc_pages_for(off, len); | ||
387 | pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); | 390 | pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); |
388 | } | 391 | } |
389 | if (IS_ERR(pages)) | 392 | if (IS_ERR(pages)) |
390 | return PTR_ERR(pages); | 393 | return PTR_ERR(pages); |
391 | 394 | ||
395 | /* | ||
396 | * flush any page cache pages in this range. this | ||
397 | * will make concurrent normal and sync io slow, | ||
398 | * but it will at least behave sensibly when they are | ||
399 | * in sequence. | ||
400 | */ | ||
392 | ret = filemap_write_and_wait(inode->i_mapping); | 401 | ret = filemap_write_and_wait(inode->i_mapping); |
393 | if (ret < 0) | 402 | if (ret < 0) |
394 | goto done; | 403 | goto done; |
395 | 404 | ||
396 | ret = striped_read(inode, off, len, pages, num_pages, checkeof); | 405 | ret = striped_read(inode, off, len, pages, num_pages, checkeof, |
406 | file->f_flags & O_DIRECT, | ||
407 | (unsigned long)data & ~PAGE_MASK); | ||
397 | 408 | ||
398 | if (ret >= 0 && (file->f_flags & O_DIRECT) == 0) | 409 | if (ret >= 0 && (file->f_flags & O_DIRECT) == 0) |
399 | ret = ceph_copy_page_vector_to_user(pages, data, off, ret); | 410 | ret = ceph_copy_page_vector_to_user(pages, data, off, ret); |
@@ -402,7 +413,7 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data, | |||
402 | 413 | ||
403 | done: | 414 | done: |
404 | if (file->f_flags & O_DIRECT) | 415 | if (file->f_flags & O_DIRECT) |
405 | ceph_put_page_vector(pages, num_pages); | 416 | ceph_put_page_vector(pages, num_pages, true); |
406 | else | 417 | else |
407 | ceph_release_page_vector(pages, num_pages); | 418 | ceph_release_page_vector(pages, num_pages); |
408 | dout("sync_read result %d\n", ret); | 419 | dout("sync_read result %d\n", ret); |
@@ -448,6 +459,8 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, | |||
448 | int flags; | 459 | int flags; |
449 | int do_sync = 0; | 460 | int do_sync = 0; |
450 | int check_caps = 0; | 461 | int check_caps = 0; |
462 | int page_align, io_align; | ||
463 | unsigned long buf_align; | ||
451 | int ret; | 464 | int ret; |
452 | struct timespec mtime = CURRENT_TIME; | 465 | struct timespec mtime = CURRENT_TIME; |
453 | 466 | ||
@@ -462,6 +475,9 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, | |||
462 | else | 475 | else |
463 | pos = *offset; | 476 | pos = *offset; |
464 | 477 | ||
478 | io_align = pos & ~PAGE_MASK; | ||
479 | buf_align = (unsigned long)data & ~PAGE_MASK; | ||
480 | |||
465 | ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left); | 481 | ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left); |
466 | if (ret < 0) | 482 | if (ret < 0) |
467 | return ret; | 483 | return ret; |
@@ -486,20 +502,27 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, | |||
486 | */ | 502 | */ |
487 | more: | 503 | more: |
488 | len = left; | 504 | len = left; |
505 | if (file->f_flags & O_DIRECT) { | ||
506 | /* write from beginning of first page, regardless of | ||
507 | io alignment */ | ||
508 | page_align = (pos - io_align + buf_align) & ~PAGE_MASK; | ||
509 | num_pages = calc_pages_for((unsigned long)data, len); | ||
510 | } else { | ||
511 | page_align = pos & ~PAGE_MASK; | ||
512 | num_pages = calc_pages_for(pos, len); | ||
513 | } | ||
489 | req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, | 514 | req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, |
490 | ceph_vino(inode), pos, &len, | 515 | ceph_vino(inode), pos, &len, |
491 | CEPH_OSD_OP_WRITE, flags, | 516 | CEPH_OSD_OP_WRITE, flags, |
492 | ci->i_snap_realm->cached_context, | 517 | ci->i_snap_realm->cached_context, |
493 | do_sync, | 518 | do_sync, |
494 | ci->i_truncate_seq, ci->i_truncate_size, | 519 | ci->i_truncate_seq, ci->i_truncate_size, |
495 | &mtime, false, 2); | 520 | &mtime, false, 2, page_align); |
496 | if (!req) | 521 | if (!req) |
497 | return -ENOMEM; | 522 | return -ENOMEM; |
498 | 523 | ||
499 | num_pages = calc_pages_for(pos, len); | ||
500 | |||
501 | if (file->f_flags & O_DIRECT) { | 524 | if (file->f_flags & O_DIRECT) { |
502 | pages = ceph_get_direct_page_vector(data, num_pages, pos, len); | 525 | pages = ceph_get_direct_page_vector(data, num_pages, false); |
503 | if (IS_ERR(pages)) { | 526 | if (IS_ERR(pages)) { |
504 | ret = PTR_ERR(pages); | 527 | ret = PTR_ERR(pages); |
505 | goto out; | 528 | goto out; |
@@ -549,7 +572,7 @@ more: | |||
549 | } | 572 | } |
550 | 573 | ||
551 | if (file->f_flags & O_DIRECT) | 574 | if (file->f_flags & O_DIRECT) |
552 | ceph_put_page_vector(pages, num_pages); | 575 | ceph_put_page_vector(pages, num_pages, false); |
553 | else if (file->f_flags & O_SYNC) | 576 | else if (file->f_flags & O_SYNC) |
554 | ceph_release_page_vector(pages, num_pages); | 577 | ceph_release_page_vector(pages, num_pages); |
555 | 578 | ||
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 1d6a45b5a04c..bf1286588f26 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -2,7 +2,6 @@ | |||
2 | 2 | ||
3 | #include <linux/module.h> | 3 | #include <linux/module.h> |
4 | #include <linux/fs.h> | 4 | #include <linux/fs.h> |
5 | #include <linux/smp_lock.h> | ||
6 | #include <linux/slab.h> | 5 | #include <linux/slab.h> |
7 | #include <linux/string.h> | 6 | #include <linux/string.h> |
8 | #include <linux/uaccess.h> | 7 | #include <linux/uaccess.h> |
@@ -471,7 +470,9 @@ void ceph_fill_file_time(struct inode *inode, int issued, | |||
471 | 470 | ||
472 | if (issued & (CEPH_CAP_FILE_EXCL| | 471 | if (issued & (CEPH_CAP_FILE_EXCL| |
473 | CEPH_CAP_FILE_WR| | 472 | CEPH_CAP_FILE_WR| |
474 | CEPH_CAP_FILE_BUFFER)) { | 473 | CEPH_CAP_FILE_BUFFER| |
474 | CEPH_CAP_AUTH_EXCL| | ||
475 | CEPH_CAP_XATTR_EXCL)) { | ||
475 | if (timespec_compare(ctime, &inode->i_ctime) > 0) { | 476 | if (timespec_compare(ctime, &inode->i_ctime) > 0) { |
476 | dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap\n", | 477 | dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap\n", |
477 | inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, | 478 | inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, |
@@ -511,7 +512,7 @@ void ceph_fill_file_time(struct inode *inode, int issued, | |||
511 | warn = 1; | 512 | warn = 1; |
512 | } | 513 | } |
513 | } else { | 514 | } else { |
514 | /* we have no write caps; whatever the MDS says is true */ | 515 | /* we have no write|excl caps; whatever the MDS says is true */ |
515 | if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) >= 0) { | 516 | if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) >= 0) { |
516 | inode->i_ctime = *ctime; | 517 | inode->i_ctime = *ctime; |
517 | inode->i_mtime = *mtime; | 518 | inode->i_mtime = *mtime; |
@@ -567,12 +568,17 @@ static int fill_inode(struct inode *inode, | |||
567 | 568 | ||
568 | /* | 569 | /* |
569 | * provided version will be odd if inode value is projected, | 570 | * provided version will be odd if inode value is projected, |
570 | * even if stable. skip the update if we have a newer info | 571 | * even if stable. skip the update if we have newer stable |
571 | * (e.g., due to inode info racing form multiple MDSs), or if | 572 | * info (ours>=theirs, e.g. due to racing mds replies), unless |
572 | * we are getting projected (unstable) inode info. | 573 | * we are getting projected (unstable) info (in which case the |
574 | * version is odd, and we want ours>theirs). | ||
575 | * us them | ||
576 | * 2 2 skip | ||
577 | * 3 2 skip | ||
578 | * 3 3 update | ||
573 | */ | 579 | */ |
574 | if (le64_to_cpu(info->version) > 0 && | 580 | if (le64_to_cpu(info->version) > 0 && |
575 | (ci->i_version & ~1) > le64_to_cpu(info->version)) | 581 | (ci->i_version & ~1) >= le64_to_cpu(info->version)) |
576 | goto no_change; | 582 | goto no_change; |
577 | 583 | ||
578 | issued = __ceph_caps_issued(ci, &implemented); | 584 | issued = __ceph_caps_issued(ci, &implemented); |
@@ -606,7 +612,14 @@ static int fill_inode(struct inode *inode, | |||
606 | le32_to_cpu(info->time_warp_seq), | 612 | le32_to_cpu(info->time_warp_seq), |
607 | &ctime, &mtime, &atime); | 613 | &ctime, &mtime, &atime); |
608 | 614 | ||
609 | ci->i_max_size = le64_to_cpu(info->max_size); | 615 | /* only update max_size on auth cap */ |
616 | if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) && | ||
617 | ci->i_max_size != le64_to_cpu(info->max_size)) { | ||
618 | dout("max_size %lld -> %llu\n", ci->i_max_size, | ||
619 | le64_to_cpu(info->max_size)); | ||
620 | ci->i_max_size = le64_to_cpu(info->max_size); | ||
621 | } | ||
622 | |||
610 | ci->i_layout = info->layout; | 623 | ci->i_layout = info->layout; |
611 | inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; | 624 | inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; |
612 | 625 | ||
@@ -1055,7 +1068,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
1055 | ininfo = rinfo->targeti.in; | 1068 | ininfo = rinfo->targeti.in; |
1056 | vino.ino = le64_to_cpu(ininfo->ino); | 1069 | vino.ino = le64_to_cpu(ininfo->ino); |
1057 | vino.snap = le64_to_cpu(ininfo->snapid); | 1070 | vino.snap = le64_to_cpu(ininfo->snapid); |
1058 | if (!dn->d_inode) { | 1071 | in = dn->d_inode; |
1072 | if (!in) { | ||
1059 | in = ceph_get_inode(sb, vino); | 1073 | in = ceph_get_inode(sb, vino); |
1060 | if (IS_ERR(in)) { | 1074 | if (IS_ERR(in)) { |
1061 | pr_err("fill_trace bad get_inode " | 1075 | pr_err("fill_trace bad get_inode " |
@@ -1386,11 +1400,8 @@ static void ceph_invalidate_work(struct work_struct *work) | |||
1386 | spin_lock(&inode->i_lock); | 1400 | spin_lock(&inode->i_lock); |
1387 | dout("invalidate_pages %p gen %d revoking %d\n", inode, | 1401 | dout("invalidate_pages %p gen %d revoking %d\n", inode, |
1388 | ci->i_rdcache_gen, ci->i_rdcache_revoking); | 1402 | ci->i_rdcache_gen, ci->i_rdcache_revoking); |
1389 | if (ci->i_rdcache_gen == 0 || | 1403 | if (ci->i_rdcache_revoking != ci->i_rdcache_gen) { |
1390 | ci->i_rdcache_revoking != ci->i_rdcache_gen) { | ||
1391 | BUG_ON(ci->i_rdcache_revoking > ci->i_rdcache_gen); | ||
1392 | /* nevermind! */ | 1404 | /* nevermind! */ |
1393 | ci->i_rdcache_revoking = 0; | ||
1394 | spin_unlock(&inode->i_lock); | 1405 | spin_unlock(&inode->i_lock); |
1395 | goto out; | 1406 | goto out; |
1396 | } | 1407 | } |
@@ -1400,15 +1411,16 @@ static void ceph_invalidate_work(struct work_struct *work) | |||
1400 | ceph_invalidate_nondirty_pages(inode->i_mapping); | 1411 | ceph_invalidate_nondirty_pages(inode->i_mapping); |
1401 | 1412 | ||
1402 | spin_lock(&inode->i_lock); | 1413 | spin_lock(&inode->i_lock); |
1403 | if (orig_gen == ci->i_rdcache_gen) { | 1414 | if (orig_gen == ci->i_rdcache_gen && |
1415 | orig_gen == ci->i_rdcache_revoking) { | ||
1404 | dout("invalidate_pages %p gen %d successful\n", inode, | 1416 | dout("invalidate_pages %p gen %d successful\n", inode, |
1405 | ci->i_rdcache_gen); | 1417 | ci->i_rdcache_gen); |
1406 | ci->i_rdcache_gen = 0; | 1418 | ci->i_rdcache_revoking--; |
1407 | ci->i_rdcache_revoking = 0; | ||
1408 | check = 1; | 1419 | check = 1; |
1409 | } else { | 1420 | } else { |
1410 | dout("invalidate_pages %p gen %d raced, gen now %d\n", | 1421 | dout("invalidate_pages %p gen %d raced, now %d revoking %d\n", |
1411 | inode, orig_gen, ci->i_rdcache_gen); | 1422 | inode, orig_gen, ci->i_rdcache_gen, |
1423 | ci->i_rdcache_revoking); | ||
1412 | } | 1424 | } |
1413 | spin_unlock(&inode->i_lock); | 1425 | spin_unlock(&inode->i_lock); |
1414 | 1426 | ||
@@ -1739,7 +1751,7 @@ int ceph_do_getattr(struct inode *inode, int mask) | |||
1739 | return 0; | 1751 | return 0; |
1740 | } | 1752 | } |
1741 | 1753 | ||
1742 | dout("do_getattr inode %p mask %s\n", inode, ceph_cap_string(mask)); | 1754 | dout("do_getattr inode %p mask %s mode 0%o\n", inode, ceph_cap_string(mask), inode->i_mode); |
1743 | if (ceph_caps_issued_mask(ceph_inode(inode), mask, 1)) | 1755 | if (ceph_caps_issued_mask(ceph_inode(inode), mask, 1)) |
1744 | return 0; | 1756 | return 0; |
1745 | 1757 | ||
diff --git a/fs/ceph/ioctl.h b/fs/ceph/ioctl.h index a6ce54e94eb5..52e8fd74d450 100644 --- a/fs/ceph/ioctl.h +++ b/fs/ceph/ioctl.h | |||
@@ -4,7 +4,7 @@ | |||
4 | #include <linux/ioctl.h> | 4 | #include <linux/ioctl.h> |
5 | #include <linux/types.h> | 5 | #include <linux/types.h> |
6 | 6 | ||
7 | #define CEPH_IOCTL_MAGIC 0x98 | 7 | #define CEPH_IOCTL_MAGIC 0x97 |
8 | 8 | ||
9 | /* just use u64 to align sanely on all archs */ | 9 | /* just use u64 to align sanely on all archs */ |
10 | struct ceph_ioctl_layout { | 10 | struct ceph_ioctl_layout { |
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index 40abde93c345..476b329867d4 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c | |||
@@ -11,40 +11,68 @@ | |||
11 | * Implement fcntl and flock locking functions. | 11 | * Implement fcntl and flock locking functions. |
12 | */ | 12 | */ |
13 | static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, | 13 | static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, |
14 | u64 pid, u64 pid_ns, | 14 | int cmd, u8 wait, struct file_lock *fl) |
15 | int cmd, u64 start, u64 length, u8 wait) | ||
16 | { | 15 | { |
17 | struct inode *inode = file->f_dentry->d_inode; | 16 | struct inode *inode = file->f_dentry->d_inode; |
18 | struct ceph_mds_client *mdsc = | 17 | struct ceph_mds_client *mdsc = |
19 | ceph_sb_to_client(inode->i_sb)->mdsc; | 18 | ceph_sb_to_client(inode->i_sb)->mdsc; |
20 | struct ceph_mds_request *req; | 19 | struct ceph_mds_request *req; |
21 | int err; | 20 | int err; |
21 | u64 length = 0; | ||
22 | 22 | ||
23 | req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS); | 23 | req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS); |
24 | if (IS_ERR(req)) | 24 | if (IS_ERR(req)) |
25 | return PTR_ERR(req); | 25 | return PTR_ERR(req); |
26 | req->r_inode = igrab(inode); | 26 | req->r_inode = igrab(inode); |
27 | 27 | ||
28 | /* mds requires start and length rather than start and end */ | ||
29 | if (LLONG_MAX == fl->fl_end) | ||
30 | length = 0; | ||
31 | else | ||
32 | length = fl->fl_end - fl->fl_start + 1; | ||
33 | |||
28 | dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " | 34 | dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " |
29 | "length: %llu, wait: %d, type`: %d", (int)lock_type, | 35 | "length: %llu, wait: %d, type`: %d", (int)lock_type, |
30 | (int)operation, pid, start, length, wait, cmd); | 36 | (int)operation, (u64)fl->fl_pid, fl->fl_start, |
37 | length, wait, fl->fl_type); | ||
38 | |||
31 | 39 | ||
32 | req->r_args.filelock_change.rule = lock_type; | 40 | req->r_args.filelock_change.rule = lock_type; |
33 | req->r_args.filelock_change.type = cmd; | 41 | req->r_args.filelock_change.type = cmd; |
34 | req->r_args.filelock_change.pid = cpu_to_le64(pid); | 42 | req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid); |
35 | /* This should be adjusted, but I'm not sure if | 43 | /* This should be adjusted, but I'm not sure if |
36 | namespaces actually get id numbers*/ | 44 | namespaces actually get id numbers*/ |
37 | req->r_args.filelock_change.pid_namespace = | 45 | req->r_args.filelock_change.pid_namespace = |
38 | cpu_to_le64((u64)pid_ns); | 46 | cpu_to_le64((u64)(unsigned long)fl->fl_nspid); |
39 | req->r_args.filelock_change.start = cpu_to_le64(start); | 47 | req->r_args.filelock_change.start = cpu_to_le64(fl->fl_start); |
40 | req->r_args.filelock_change.length = cpu_to_le64(length); | 48 | req->r_args.filelock_change.length = cpu_to_le64(length); |
41 | req->r_args.filelock_change.wait = wait; | 49 | req->r_args.filelock_change.wait = wait; |
42 | 50 | ||
43 | err = ceph_mdsc_do_request(mdsc, inode, req); | 51 | err = ceph_mdsc_do_request(mdsc, inode, req); |
52 | |||
53 | if ( operation == CEPH_MDS_OP_GETFILELOCK){ | ||
54 | fl->fl_pid = le64_to_cpu(req->r_reply_info.filelock_reply->pid); | ||
55 | if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type) | ||
56 | fl->fl_type = F_RDLCK; | ||
57 | else if (CEPH_LOCK_EXCL == req->r_reply_info.filelock_reply->type) | ||
58 | fl->fl_type = F_WRLCK; | ||
59 | else | ||
60 | fl->fl_type = F_UNLCK; | ||
61 | |||
62 | fl->fl_start = le64_to_cpu(req->r_reply_info.filelock_reply->start); | ||
63 | length = le64_to_cpu(req->r_reply_info.filelock_reply->start) + | ||
64 | le64_to_cpu(req->r_reply_info.filelock_reply->length); | ||
65 | if (length >= 1) | ||
66 | fl->fl_end = length -1; | ||
67 | else | ||
68 | fl->fl_end = 0; | ||
69 | |||
70 | } | ||
44 | ceph_mdsc_put_request(req); | 71 | ceph_mdsc_put_request(req); |
45 | dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " | 72 | dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " |
46 | "length: %llu, wait: %d, type`: %d err code %d", (int)lock_type, | 73 | "length: %llu, wait: %d, type`: %d, err code %d", (int)lock_type, |
47 | (int)operation, pid, start, length, wait, cmd, err); | 74 | (int)operation, (u64)fl->fl_pid, fl->fl_start, |
75 | length, wait, fl->fl_type, err); | ||
48 | return err; | 76 | return err; |
49 | } | 77 | } |
50 | 78 | ||
@@ -54,7 +82,6 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, | |||
54 | */ | 82 | */ |
55 | int ceph_lock(struct file *file, int cmd, struct file_lock *fl) | 83 | int ceph_lock(struct file *file, int cmd, struct file_lock *fl) |
56 | { | 84 | { |
57 | u64 length; | ||
58 | u8 lock_cmd; | 85 | u8 lock_cmd; |
59 | int err; | 86 | int err; |
60 | u8 wait = 0; | 87 | u8 wait = 0; |
@@ -76,29 +103,20 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) | |||
76 | else | 103 | else |
77 | lock_cmd = CEPH_LOCK_UNLOCK; | 104 | lock_cmd = CEPH_LOCK_UNLOCK; |
78 | 105 | ||
79 | if (LLONG_MAX == fl->fl_end) | 106 | err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl); |
80 | length = 0; | ||
81 | else | ||
82 | length = fl->fl_end - fl->fl_start + 1; | ||
83 | |||
84 | err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, | ||
85 | (u64)fl->fl_pid, | ||
86 | (u64)(unsigned long)fl->fl_nspid, | ||
87 | lock_cmd, fl->fl_start, | ||
88 | length, wait); | ||
89 | if (!err) { | 107 | if (!err) { |
90 | dout("mds locked, locking locally"); | 108 | if ( op != CEPH_MDS_OP_GETFILELOCK ){ |
91 | err = posix_lock_file(file, fl, NULL); | 109 | dout("mds locked, locking locally"); |
92 | if (err && (CEPH_MDS_OP_SETFILELOCK == op)) { | 110 | err = posix_lock_file(file, fl, NULL); |
93 | /* undo! This should only happen if the kernel detects | 111 | if (err && (CEPH_MDS_OP_SETFILELOCK == op)) { |
94 | * local deadlock. */ | 112 | /* undo! This should only happen if the kernel detects |
95 | ceph_lock_message(CEPH_LOCK_FCNTL, op, file, | 113 | * local deadlock. */ |
96 | (u64)fl->fl_pid, | 114 | ceph_lock_message(CEPH_LOCK_FCNTL, op, file, |
97 | (u64)(unsigned long)fl->fl_nspid, | 115 | CEPH_LOCK_UNLOCK, 0, fl); |
98 | CEPH_LOCK_UNLOCK, fl->fl_start, | 116 | dout("got %d on posix_lock_file, undid lock", err); |
99 | length, 0); | 117 | } |
100 | dout("got %d on posix_lock_file, undid lock", err); | ||
101 | } | 118 | } |
119 | |||
102 | } else { | 120 | } else { |
103 | dout("mds returned error code %d", err); | 121 | dout("mds returned error code %d", err); |
104 | } | 122 | } |
@@ -107,7 +125,6 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) | |||
107 | 125 | ||
108 | int ceph_flock(struct file *file, int cmd, struct file_lock *fl) | 126 | int ceph_flock(struct file *file, int cmd, struct file_lock *fl) |
109 | { | 127 | { |
110 | u64 length; | ||
111 | u8 lock_cmd; | 128 | u8 lock_cmd; |
112 | int err; | 129 | int err; |
113 | u8 wait = 1; | 130 | u8 wait = 1; |
@@ -127,26 +144,15 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) | |||
127 | lock_cmd = CEPH_LOCK_EXCL; | 144 | lock_cmd = CEPH_LOCK_EXCL; |
128 | else | 145 | else |
129 | lock_cmd = CEPH_LOCK_UNLOCK; | 146 | lock_cmd = CEPH_LOCK_UNLOCK; |
130 | /* mds requires start and length rather than start and end */ | ||
131 | if (LLONG_MAX == fl->fl_end) | ||
132 | length = 0; | ||
133 | else | ||
134 | length = fl->fl_end - fl->fl_start + 1; | ||
135 | 147 | ||
136 | err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, | 148 | err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, |
137 | file, (u64)fl->fl_pid, | 149 | file, lock_cmd, wait, fl); |
138 | (u64)(unsigned long)fl->fl_nspid, | ||
139 | lock_cmd, fl->fl_start, | ||
140 | length, wait); | ||
141 | if (!err) { | 150 | if (!err) { |
142 | err = flock_lock_file_wait(file, fl); | 151 | err = flock_lock_file_wait(file, fl); |
143 | if (err) { | 152 | if (err) { |
144 | ceph_lock_message(CEPH_LOCK_FLOCK, | 153 | ceph_lock_message(CEPH_LOCK_FLOCK, |
145 | CEPH_MDS_OP_SETFILELOCK, | 154 | CEPH_MDS_OP_SETFILELOCK, |
146 | file, (u64)fl->fl_pid, | 155 | file, CEPH_LOCK_UNLOCK, 0, fl); |
147 | (u64)(unsigned long)fl->fl_nspid, | ||
148 | CEPH_LOCK_UNLOCK, fl->fl_start, | ||
149 | length, 0); | ||
150 | dout("got %d on flock_lock_file_wait, undid lock", err); | 156 | dout("got %d on flock_lock_file_wait, undid lock", err); |
151 | } | 157 | } |
152 | } else { | 158 | } else { |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 3142b15940c2..38800eaa81d0 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -6,7 +6,6 @@ | |||
6 | #include <linux/sched.h> | 6 | #include <linux/sched.h> |
7 | #include <linux/debugfs.h> | 7 | #include <linux/debugfs.h> |
8 | #include <linux/seq_file.h> | 8 | #include <linux/seq_file.h> |
9 | #include <linux/smp_lock.h> | ||
10 | 9 | ||
11 | #include "super.h" | 10 | #include "super.h" |
12 | #include "mds_client.h" | 11 | #include "mds_client.h" |
@@ -203,6 +202,38 @@ out_bad: | |||
203 | } | 202 | } |
204 | 203 | ||
205 | /* | 204 | /* |
205 | * parse fcntl F_GETLK results | ||
206 | */ | ||
207 | static int parse_reply_info_filelock(void **p, void *end, | ||
208 | struct ceph_mds_reply_info_parsed *info) | ||
209 | { | ||
210 | if (*p + sizeof(*info->filelock_reply) > end) | ||
211 | goto bad; | ||
212 | |||
213 | info->filelock_reply = *p; | ||
214 | *p += sizeof(*info->filelock_reply); | ||
215 | |||
216 | if (unlikely(*p != end)) | ||
217 | goto bad; | ||
218 | return 0; | ||
219 | |||
220 | bad: | ||
221 | return -EIO; | ||
222 | } | ||
223 | |||
224 | /* | ||
225 | * parse extra results | ||
226 | */ | ||
227 | static int parse_reply_info_extra(void **p, void *end, | ||
228 | struct ceph_mds_reply_info_parsed *info) | ||
229 | { | ||
230 | if (info->head->op == CEPH_MDS_OP_GETFILELOCK) | ||
231 | return parse_reply_info_filelock(p, end, info); | ||
232 | else | ||
233 | return parse_reply_info_dir(p, end, info); | ||
234 | } | ||
235 | |||
236 | /* | ||
206 | * parse entire mds reply | 237 | * parse entire mds reply |
207 | */ | 238 | */ |
208 | static int parse_reply_info(struct ceph_msg *msg, | 239 | static int parse_reply_info(struct ceph_msg *msg, |
@@ -224,10 +255,10 @@ static int parse_reply_info(struct ceph_msg *msg, | |||
224 | goto out_bad; | 255 | goto out_bad; |
225 | } | 256 | } |
226 | 257 | ||
227 | /* dir content */ | 258 | /* extra */ |
228 | ceph_decode_32_safe(&p, end, len, bad); | 259 | ceph_decode_32_safe(&p, end, len, bad); |
229 | if (len > 0) { | 260 | if (len > 0) { |
230 | err = parse_reply_info_dir(&p, p+len, info); | 261 | err = parse_reply_info_extra(&p, p+len, info); |
231 | if (err < 0) | 262 | if (err < 0) |
232 | goto out_bad; | 263 | goto out_bad; |
233 | } | 264 | } |
@@ -529,6 +560,9 @@ static void __register_request(struct ceph_mds_client *mdsc, | |||
529 | ceph_mdsc_get_request(req); | 560 | ceph_mdsc_get_request(req); |
530 | __insert_request(mdsc, req); | 561 | __insert_request(mdsc, req); |
531 | 562 | ||
563 | req->r_uid = current_fsuid(); | ||
564 | req->r_gid = current_fsgid(); | ||
565 | |||
532 | if (dir) { | 566 | if (dir) { |
533 | struct ceph_inode_info *ci = ceph_inode(dir); | 567 | struct ceph_inode_info *ci = ceph_inode(dir); |
534 | 568 | ||
@@ -1588,8 +1622,8 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, | |||
1588 | 1622 | ||
1589 | head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch); | 1623 | head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch); |
1590 | head->op = cpu_to_le32(req->r_op); | 1624 | head->op = cpu_to_le32(req->r_op); |
1591 | head->caller_uid = cpu_to_le32(current_fsuid()); | 1625 | head->caller_uid = cpu_to_le32(req->r_uid); |
1592 | head->caller_gid = cpu_to_le32(current_fsgid()); | 1626 | head->caller_gid = cpu_to_le32(req->r_gid); |
1593 | head->args = req->r_args; | 1627 | head->args = req->r_args; |
1594 | 1628 | ||
1595 | ceph_encode_filepath(&p, end, ino1, path1); | 1629 | ceph_encode_filepath(&p, end, ino1, path1); |
@@ -2072,7 +2106,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
2072 | 2106 | ||
2073 | mutex_lock(&session->s_mutex); | 2107 | mutex_lock(&session->s_mutex); |
2074 | if (err < 0) { | 2108 | if (err < 0) { |
2075 | pr_err("mdsc_handle_reply got corrupt reply mds%d\n", mds); | 2109 | pr_err("mdsc_handle_reply got corrupt reply mds%d(tid:%lld)\n", mds, tid); |
2076 | ceph_msg_dump(msg); | 2110 | ceph_msg_dump(msg); |
2077 | goto out_err; | 2111 | goto out_err; |
2078 | } | 2112 | } |
@@ -2092,7 +2126,8 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
2092 | mutex_lock(&req->r_fill_mutex); | 2126 | mutex_lock(&req->r_fill_mutex); |
2093 | err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session); | 2127 | err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session); |
2094 | if (err == 0) { | 2128 | if (err == 0) { |
2095 | if (result == 0 && rinfo->dir_nr) | 2129 | if (result == 0 && req->r_op != CEPH_MDS_OP_GETFILELOCK && |
2130 | rinfo->dir_nr) | ||
2096 | ceph_readdir_prepopulate(req, req->r_session); | 2131 | ceph_readdir_prepopulate(req, req->r_session); |
2097 | ceph_unreserve_caps(mdsc, &req->r_caps_reservation); | 2132 | ceph_unreserve_caps(mdsc, &req->r_caps_reservation); |
2098 | } | 2133 | } |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index d66d63c72355..aabe563b54db 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
@@ -42,26 +42,37 @@ struct ceph_mds_reply_info_in { | |||
42 | }; | 42 | }; |
43 | 43 | ||
44 | /* | 44 | /* |
45 | * parsed info about an mds reply, including information about the | 45 | * parsed info about an mds reply, including information about |
46 | * target inode and/or its parent directory and dentry, and directory | 46 | * either: 1) the target inode and/or its parent directory and dentry, |
47 | * contents (for readdir results). | 47 | * and directory contents (for readdir results), or |
48 | * 2) the file range lock info (for fcntl F_GETLK results). | ||
48 | */ | 49 | */ |
49 | struct ceph_mds_reply_info_parsed { | 50 | struct ceph_mds_reply_info_parsed { |
50 | struct ceph_mds_reply_head *head; | 51 | struct ceph_mds_reply_head *head; |
51 | 52 | ||
53 | /* trace */ | ||
52 | struct ceph_mds_reply_info_in diri, targeti; | 54 | struct ceph_mds_reply_info_in diri, targeti; |
53 | struct ceph_mds_reply_dirfrag *dirfrag; | 55 | struct ceph_mds_reply_dirfrag *dirfrag; |
54 | char *dname; | 56 | char *dname; |
55 | u32 dname_len; | 57 | u32 dname_len; |
56 | struct ceph_mds_reply_lease *dlease; | 58 | struct ceph_mds_reply_lease *dlease; |
57 | 59 | ||
58 | struct ceph_mds_reply_dirfrag *dir_dir; | 60 | /* extra */ |
59 | int dir_nr; | 61 | union { |
60 | char **dir_dname; | 62 | /* for fcntl F_GETLK results */ |
61 | u32 *dir_dname_len; | 63 | struct ceph_filelock *filelock_reply; |
62 | struct ceph_mds_reply_lease **dir_dlease; | 64 | |
63 | struct ceph_mds_reply_info_in *dir_in; | 65 | /* for readdir results */ |
64 | u8 dir_complete, dir_end; | 66 | struct { |
67 | struct ceph_mds_reply_dirfrag *dir_dir; | ||
68 | int dir_nr; | ||
69 | char **dir_dname; | ||
70 | u32 *dir_dname_len; | ||
71 | struct ceph_mds_reply_lease **dir_dlease; | ||
72 | struct ceph_mds_reply_info_in *dir_in; | ||
73 | u8 dir_complete, dir_end; | ||
74 | }; | ||
75 | }; | ||
65 | 76 | ||
66 | /* encoded blob describing snapshot contexts for certain | 77 | /* encoded blob describing snapshot contexts for certain |
67 | operations (e.g., open) */ | 78 | operations (e.g., open) */ |
@@ -170,6 +181,8 @@ struct ceph_mds_request { | |||
170 | 181 | ||
171 | union ceph_mds_request_args r_args; | 182 | union ceph_mds_request_args r_args; |
172 | int r_fmode; /* file mode, if expecting cap */ | 183 | int r_fmode; /* file mode, if expecting cap */ |
184 | uid_t r_uid; | ||
185 | gid_t r_gid; | ||
173 | 186 | ||
174 | /* for choosing which mds to send this request to */ | 187 | /* for choosing which mds to send this request to */ |
175 | int r_direct_mode; | 188 | int r_direct_mode; |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 1886294e12f7..7f01728a4657 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -293,9 +293,7 @@ struct ceph_inode_info { | |||
293 | int i_rd_ref, i_rdcache_ref, i_wr_ref; | 293 | int i_rd_ref, i_rdcache_ref, i_wr_ref; |
294 | int i_wrbuffer_ref, i_wrbuffer_ref_head; | 294 | int i_wrbuffer_ref, i_wrbuffer_ref_head; |
295 | u32 i_shared_gen; /* increment each time we get FILE_SHARED */ | 295 | u32 i_shared_gen; /* increment each time we get FILE_SHARED */ |
296 | u32 i_rdcache_gen; /* we increment this each time we get | 296 | u32 i_rdcache_gen; /* incremented each time we get FILE_CACHE. */ |
297 | FILE_CACHE. If it's non-zero, we | ||
298 | _may_ have cached pages. */ | ||
299 | u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */ | 297 | u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */ |
300 | 298 | ||
301 | struct list_head i_unsafe_writes; /* uncommitted sync writes */ | 299 | struct list_head i_unsafe_writes; /* uncommitted sync writes */ |
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index 0ed213970ced..ee45648b0d1a 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig | |||
@@ -4,6 +4,7 @@ config CIFS | |||
4 | select NLS | 4 | select NLS |
5 | select CRYPTO | 5 | select CRYPTO |
6 | select CRYPTO_MD5 | 6 | select CRYPTO_MD5 |
7 | select CRYPTO_HMAC | ||
7 | select CRYPTO_ARC4 | 8 | select CRYPTO_ARC4 |
8 | help | 9 | help |
9 | This is the client VFS module for the Common Internet File System | 10 | This is the client VFS module for the Common Internet File System |
@@ -143,6 +144,13 @@ config CIFS_FSCACHE | |||
143 | to be cached locally on disk through the general filesystem cache | 144 | to be cached locally on disk through the general filesystem cache |
144 | manager. If unsure, say N. | 145 | manager. If unsure, say N. |
145 | 146 | ||
147 | config CIFS_ACL | ||
148 | bool "Provide CIFS ACL support (EXPERIMENTAL)" | ||
149 | depends on EXPERIMENTAL && CIFS_XATTR | ||
150 | help | ||
151 | Allows to fetch CIFS/NTFS ACL from the server. The DACL blob | ||
152 | is handed over to the application/caller. | ||
153 | |||
146 | config CIFS_EXPERIMENTAL | 154 | config CIFS_EXPERIMENTAL |
147 | bool "CIFS Experimental Features (EXPERIMENTAL)" | 155 | bool "CIFS Experimental Features (EXPERIMENTAL)" |
148 | depends on CIFS && EXPERIMENTAL | 156 | depends on CIFS && EXPERIMENTAL |
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile index adefa60a9bdc..43b19dd39191 100644 --- a/fs/cifs/Makefile +++ b/fs/cifs/Makefile | |||
@@ -6,7 +6,9 @@ obj-$(CONFIG_CIFS) += cifs.o | |||
6 | cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \ | 6 | cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \ |
7 | link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o \ | 7 | link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o \ |
8 | md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o \ | 8 | md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o \ |
9 | readdir.o ioctl.o sess.o export.o cifsacl.o | 9 | readdir.o ioctl.o sess.o export.o |
10 | |||
11 | cifs-$(CONFIG_CIFS_ACL) += cifsacl.o | ||
10 | 12 | ||
11 | cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o | 13 | cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o |
12 | 14 | ||
diff --git a/fs/cifs/README b/fs/cifs/README index ee68d1036544..46af99ab3614 100644 --- a/fs/cifs/README +++ b/fs/cifs/README | |||
@@ -337,6 +337,15 @@ A partial list of the supported mount options follows: | |||
337 | wsize default write size (default 57344) | 337 | wsize default write size (default 57344) |
338 | maximum wsize currently allowed by CIFS is 57344 (fourteen | 338 | maximum wsize currently allowed by CIFS is 57344 (fourteen |
339 | 4096 byte pages) | 339 | 4096 byte pages) |
340 | actimeo=n attribute cache timeout in seconds (default 1 second). | ||
341 | After this timeout, the cifs client requests fresh attribute | ||
342 | information from the server. This option allows to tune the | ||
343 | attribute cache timeout to suit the workload needs. Shorter | ||
344 | timeouts mean better the cache coherency, but increased number | ||
345 | of calls to the server. Longer timeouts mean reduced number | ||
346 | of calls to the server at the expense of less stricter cache | ||
347 | coherency checks (i.e. incorrect attribute cache for a short | ||
348 | period of time). | ||
340 | rw mount the network share read-write (note that the | 349 | rw mount the network share read-write (note that the |
341 | server may still consider the share read-only) | 350 | server may still consider the share read-only) |
342 | ro mount network share read-only | 351 | ro mount network share read-only |
diff --git a/fs/cifs/TODO b/fs/cifs/TODO index 5aff46c61e52..355abcdcda98 100644 --- a/fs/cifs/TODO +++ b/fs/cifs/TODO | |||
@@ -81,7 +81,7 @@ u) DOS attrs - returned as pseudo-xattr in Samba format (check VFAT and NTFS for | |||
81 | 81 | ||
82 | v) mount check for unmatched uids | 82 | v) mount check for unmatched uids |
83 | 83 | ||
84 | w) Add support for new vfs entry points for setlease and fallocate | 84 | w) Add support for new vfs entry point for fallocate |
85 | 85 | ||
86 | x) Fix Samba 3 server to handle Linux kernel aio so dbench with lots of | 86 | x) Fix Samba 3 server to handle Linux kernel aio so dbench with lots of |
87 | processes can proceed better in parallel (on the server) | 87 | processes can proceed better in parallel (on the server) |
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h index 525ba59a4105..7852cd677051 100644 --- a/fs/cifs/cifs_fs_sb.h +++ b/fs/cifs/cifs_fs_sb.h | |||
@@ -15,7 +15,7 @@ | |||
15 | * the GNU Lesser General Public License for more details. | 15 | * the GNU Lesser General Public License for more details. |
16 | * | 16 | * |
17 | */ | 17 | */ |
18 | #include <linux/radix-tree.h> | 18 | #include <linux/rbtree.h> |
19 | 19 | ||
20 | #ifndef _CIFS_FS_SB_H | 20 | #ifndef _CIFS_FS_SB_H |
21 | #define _CIFS_FS_SB_H | 21 | #define _CIFS_FS_SB_H |
@@ -42,12 +42,13 @@ | |||
42 | #define CIFS_MOUNT_MULTIUSER 0x20000 /* multiuser mount */ | 42 | #define CIFS_MOUNT_MULTIUSER 0x20000 /* multiuser mount */ |
43 | 43 | ||
44 | struct cifs_sb_info { | 44 | struct cifs_sb_info { |
45 | struct radix_tree_root tlink_tree; | 45 | struct rb_root tlink_tree; |
46 | #define CIFS_TLINK_MASTER_TAG 0 /* is "master" (mount) tcon */ | ||
47 | spinlock_t tlink_tree_lock; | 46 | spinlock_t tlink_tree_lock; |
47 | struct tcon_link *master_tlink; | ||
48 | struct nls_table *local_nls; | 48 | struct nls_table *local_nls; |
49 | unsigned int rsize; | 49 | unsigned int rsize; |
50 | unsigned int wsize; | 50 | unsigned int wsize; |
51 | unsigned long actimeo; /* attribute cache timeout (jiffies) */ | ||
51 | atomic_t active; | 52 | atomic_t active; |
52 | uid_t mnt_uid; | 53 | uid_t mnt_uid; |
53 | gid_t mnt_gid; | 54 | gid_t mnt_gid; |
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index c9b4792ae825..a437ec391a01 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c | |||
@@ -30,8 +30,6 @@ | |||
30 | #include "cifs_debug.h" | 30 | #include "cifs_debug.h" |
31 | 31 | ||
32 | 32 | ||
33 | #ifdef CONFIG_CIFS_EXPERIMENTAL | ||
34 | |||
35 | static struct cifs_wksid wksidarr[NUM_WK_SIDS] = { | 33 | static struct cifs_wksid wksidarr[NUM_WK_SIDS] = { |
36 | {{1, 0, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0} }, "null user"}, | 34 | {{1, 0, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0} }, "null user"}, |
37 | {{1, 1, {0, 0, 0, 0, 0, 1}, {0, 0, 0, 0, 0} }, "nobody"}, | 35 | {{1, 1, {0, 0, 0, 0, 0, 1}, {0, 0, 0, 0, 0} }, "nobody"}, |
@@ -560,7 +558,7 @@ static struct cifs_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb, | |||
560 | struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); | 558 | struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); |
561 | 559 | ||
562 | if (IS_ERR(tlink)) | 560 | if (IS_ERR(tlink)) |
563 | return NULL; | 561 | return ERR_CAST(tlink); |
564 | 562 | ||
565 | xid = GetXid(); | 563 | xid = GetXid(); |
566 | rc = CIFSSMBGetCIFSACL(xid, tlink_tcon(tlink), fid, &pntsd, pacllen); | 564 | rc = CIFSSMBGetCIFSACL(xid, tlink_tcon(tlink), fid, &pntsd, pacllen); |
@@ -568,7 +566,9 @@ static struct cifs_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb, | |||
568 | 566 | ||
569 | cifs_put_tlink(tlink); | 567 | cifs_put_tlink(tlink); |
570 | 568 | ||
571 | cFYI(1, "GetCIFSACL rc = %d ACL len %d", rc, *pacllen); | 569 | cFYI(1, "%s: rc = %d ACL len %d", __func__, rc, *pacllen); |
570 | if (rc) | ||
571 | return ERR_PTR(rc); | ||
572 | return pntsd; | 572 | return pntsd; |
573 | } | 573 | } |
574 | 574 | ||
@@ -583,7 +583,7 @@ static struct cifs_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, | |||
583 | struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); | 583 | struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); |
584 | 584 | ||
585 | if (IS_ERR(tlink)) | 585 | if (IS_ERR(tlink)) |
586 | return NULL; | 586 | return ERR_CAST(tlink); |
587 | 587 | ||
588 | tcon = tlink_tcon(tlink); | 588 | tcon = tlink_tcon(tlink); |
589 | xid = GetXid(); | 589 | xid = GetXid(); |
@@ -591,23 +591,22 @@ static struct cifs_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, | |||
591 | rc = CIFSSMBOpen(xid, tcon, path, FILE_OPEN, READ_CONTROL, 0, | 591 | rc = CIFSSMBOpen(xid, tcon, path, FILE_OPEN, READ_CONTROL, 0, |
592 | &fid, &oplock, NULL, cifs_sb->local_nls, | 592 | &fid, &oplock, NULL, cifs_sb->local_nls, |
593 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); | 593 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); |
594 | if (rc) { | 594 | if (!rc) { |
595 | cERROR(1, "Unable to open file to get ACL"); | 595 | rc = CIFSSMBGetCIFSACL(xid, tcon, fid, &pntsd, pacllen); |
596 | goto out; | 596 | CIFSSMBClose(xid, tcon, fid); |
597 | } | 597 | } |
598 | 598 | ||
599 | rc = CIFSSMBGetCIFSACL(xid, tcon, fid, &pntsd, pacllen); | ||
600 | cFYI(1, "GetCIFSACL rc = %d ACL len %d", rc, *pacllen); | ||
601 | |||
602 | CIFSSMBClose(xid, tcon, fid); | ||
603 | out: | ||
604 | cifs_put_tlink(tlink); | 599 | cifs_put_tlink(tlink); |
605 | FreeXid(xid); | 600 | FreeXid(xid); |
601 | |||
602 | cFYI(1, "%s: rc = %d ACL len %d", __func__, rc, *pacllen); | ||
603 | if (rc) | ||
604 | return ERR_PTR(rc); | ||
606 | return pntsd; | 605 | return pntsd; |
607 | } | 606 | } |
608 | 607 | ||
609 | /* Retrieve an ACL from the server */ | 608 | /* Retrieve an ACL from the server */ |
610 | static struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb, | 609 | struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb, |
611 | struct inode *inode, const char *path, | 610 | struct inode *inode, const char *path, |
612 | u32 *pacllen) | 611 | u32 *pacllen) |
613 | { | 612 | { |
@@ -695,7 +694,7 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, | |||
695 | } | 694 | } |
696 | 695 | ||
697 | /* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */ | 696 | /* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */ |
698 | void | 697 | int |
699 | cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr, | 698 | cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr, |
700 | struct inode *inode, const char *path, const __u16 *pfid) | 699 | struct inode *inode, const char *path, const __u16 *pfid) |
701 | { | 700 | { |
@@ -711,17 +710,21 @@ cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr, | |||
711 | pntsd = get_cifs_acl(cifs_sb, inode, path, &acllen); | 710 | pntsd = get_cifs_acl(cifs_sb, inode, path, &acllen); |
712 | 711 | ||
713 | /* if we can retrieve the ACL, now parse Access Control Entries, ACEs */ | 712 | /* if we can retrieve the ACL, now parse Access Control Entries, ACEs */ |
714 | if (pntsd) | 713 | if (IS_ERR(pntsd)) { |
714 | rc = PTR_ERR(pntsd); | ||
715 | cERROR(1, "%s: error %d getting sec desc", __func__, rc); | ||
716 | } else { | ||
715 | rc = parse_sec_desc(pntsd, acllen, fattr); | 717 | rc = parse_sec_desc(pntsd, acllen, fattr); |
716 | if (rc) | 718 | kfree(pntsd); |
717 | cFYI(1, "parse sec desc failed rc = %d", rc); | 719 | if (rc) |
720 | cERROR(1, "parse sec desc failed rc = %d", rc); | ||
721 | } | ||
718 | 722 | ||
719 | kfree(pntsd); | 723 | return rc; |
720 | return; | ||
721 | } | 724 | } |
722 | 725 | ||
723 | /* Convert mode bits to an ACL so we can update the ACL on the server */ | 726 | /* Convert mode bits to an ACL so we can update the ACL on the server */ |
724 | int mode_to_acl(struct inode *inode, const char *path, __u64 nmode) | 727 | int mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode) |
725 | { | 728 | { |
726 | int rc = 0; | 729 | int rc = 0; |
727 | __u32 secdesclen = 0; | 730 | __u32 secdesclen = 0; |
@@ -736,7 +739,10 @@ int mode_to_acl(struct inode *inode, const char *path, __u64 nmode) | |||
736 | /* Add three ACEs for owner, group, everyone getting rid of | 739 | /* Add three ACEs for owner, group, everyone getting rid of |
737 | other ACEs as chmod disables ACEs and set the security descriptor */ | 740 | other ACEs as chmod disables ACEs and set the security descriptor */ |
738 | 741 | ||
739 | if (pntsd) { | 742 | if (IS_ERR(pntsd)) { |
743 | rc = PTR_ERR(pntsd); | ||
744 | cERROR(1, "%s: error %d getting sec desc", __func__, rc); | ||
745 | } else { | ||
740 | /* allocate memory for the smb header, | 746 | /* allocate memory for the smb header, |
741 | set security descriptor request security descriptor | 747 | set security descriptor request security descriptor |
742 | parameters, and secuirty descriptor itself */ | 748 | parameters, and secuirty descriptor itself */ |
@@ -766,4 +772,3 @@ int mode_to_acl(struct inode *inode, const char *path, __u64 nmode) | |||
766 | 772 | ||
767 | return rc; | 773 | return rc; |
768 | } | 774 | } |
769 | #endif /* CONFIG_CIFS_EXPERIMENTAL */ | ||
diff --git a/fs/cifs/cifsacl.h b/fs/cifs/cifsacl.h index 6c8096cf5155..c4ae7d036563 100644 --- a/fs/cifs/cifsacl.h +++ b/fs/cifs/cifsacl.h | |||
@@ -74,11 +74,7 @@ struct cifs_wksid { | |||
74 | char sidname[SIDNAMELENGTH]; | 74 | char sidname[SIDNAMELENGTH]; |
75 | } __attribute__((packed)); | 75 | } __attribute__((packed)); |
76 | 76 | ||
77 | #ifdef CONFIG_CIFS_EXPERIMENTAL | ||
78 | |||
79 | extern int match_sid(struct cifs_sid *); | 77 | extern int match_sid(struct cifs_sid *); |
80 | extern int compare_sids(const struct cifs_sid *, const struct cifs_sid *); | 78 | extern int compare_sids(const struct cifs_sid *, const struct cifs_sid *); |
81 | 79 | ||
82 | #endif /* CONFIG_CIFS_EXPERIMENTAL */ | ||
83 | |||
84 | #endif /* _CIFSACL_H */ | 80 | #endif /* _CIFSACL_H */ |
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 75c4eaa79588..3936aa7f2c22 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c | |||
@@ -116,7 +116,7 @@ cifs_read_super(struct super_block *sb, void *data, | |||
116 | return -ENOMEM; | 116 | return -ENOMEM; |
117 | 117 | ||
118 | spin_lock_init(&cifs_sb->tlink_tree_lock); | 118 | spin_lock_init(&cifs_sb->tlink_tree_lock); |
119 | INIT_RADIX_TREE(&cifs_sb->tlink_tree, GFP_KERNEL); | 119 | cifs_sb->tlink_tree = RB_ROOT; |
120 | 120 | ||
121 | rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs", BDI_CAP_MAP_COPY); | 121 | rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs", BDI_CAP_MAP_COPY); |
122 | if (rc) { | 122 | if (rc) { |
@@ -321,8 +321,7 @@ cifs_alloc_inode(struct super_block *sb) | |||
321 | /* Until the file is open and we have gotten oplock | 321 | /* Until the file is open and we have gotten oplock |
322 | info back from the server, can not assume caching of | 322 | info back from the server, can not assume caching of |
323 | file data or metadata */ | 323 | file data or metadata */ |
324 | cifs_inode->clientCanCacheRead = false; | 324 | cifs_set_oplock_level(cifs_inode, 0); |
325 | cifs_inode->clientCanCacheAll = false; | ||
326 | cifs_inode->delete_pending = false; | 325 | cifs_inode->delete_pending = false; |
327 | cifs_inode->invalid_mapping = false; | 326 | cifs_inode->invalid_mapping = false; |
328 | cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */ | 327 | cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */ |
@@ -459,9 +458,13 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m) | |||
459 | seq_printf(s, ",acl"); | 458 | seq_printf(s, ",acl"); |
460 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) | 459 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) |
461 | seq_printf(s, ",mfsymlinks"); | 460 | seq_printf(s, ",mfsymlinks"); |
461 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE) | ||
462 | seq_printf(s, ",fsc"); | ||
462 | 463 | ||
463 | seq_printf(s, ",rsize=%d", cifs_sb->rsize); | 464 | seq_printf(s, ",rsize=%d", cifs_sb->rsize); |
464 | seq_printf(s, ",wsize=%d", cifs_sb->wsize); | 465 | seq_printf(s, ",wsize=%d", cifs_sb->wsize); |
466 | /* convert actimeo and display it in seconds */ | ||
467 | seq_printf(s, ",actimeo=%lu", cifs_sb->actimeo / HZ); | ||
465 | 468 | ||
466 | return 0; | 469 | return 0; |
467 | } | 470 | } |
@@ -934,7 +937,6 @@ init_cifs(void) | |||
934 | GlobalCurrentXid = 0; | 937 | GlobalCurrentXid = 0; |
935 | GlobalTotalActiveXid = 0; | 938 | GlobalTotalActiveXid = 0; |
936 | GlobalMaxActiveXid = 0; | 939 | GlobalMaxActiveXid = 0; |
937 | memset(Local_System_Name, 0, 15); | ||
938 | spin_lock_init(&cifs_tcp_ses_lock); | 940 | spin_lock_init(&cifs_tcp_ses_lock); |
939 | spin_lock_init(&cifs_file_list_lock); | 941 | spin_lock_init(&cifs_file_list_lock); |
940 | spin_lock_init(&GlobalMid_Lock); | 942 | spin_lock_init(&GlobalMid_Lock); |
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index f259e4d7612d..7136c0c3e2f9 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h | |||
@@ -45,6 +45,16 @@ | |||
45 | #define CIFS_MIN_RCV_POOL 4 | 45 | #define CIFS_MIN_RCV_POOL 4 |
46 | 46 | ||
47 | /* | 47 | /* |
48 | * default attribute cache timeout (jiffies) | ||
49 | */ | ||
50 | #define CIFS_DEF_ACTIMEO (1 * HZ) | ||
51 | |||
52 | /* | ||
53 | * max attribute cache timeout (jiffies) - 2^30 | ||
54 | */ | ||
55 | #define CIFS_MAX_ACTIMEO (1 << 30) | ||
56 | |||
57 | /* | ||
48 | * MAX_REQ is the maximum number of requests that WE will send | 58 | * MAX_REQ is the maximum number of requests that WE will send |
49 | * on one socket concurrently. It also matches the most common | 59 | * on one socket concurrently. It also matches the most common |
50 | * value of max multiplex returned by servers. We may | 60 | * value of max multiplex returned by servers. We may |
@@ -336,7 +346,8 @@ struct cifsTconInfo { | |||
336 | * "get" on the container. | 346 | * "get" on the container. |
337 | */ | 347 | */ |
338 | struct tcon_link { | 348 | struct tcon_link { |
339 | unsigned long tl_index; | 349 | struct rb_node tl_rbnode; |
350 | uid_t tl_uid; | ||
340 | unsigned long tl_flags; | 351 | unsigned long tl_flags; |
341 | #define TCON_LINK_MASTER 0 | 352 | #define TCON_LINK_MASTER 0 |
342 | #define TCON_LINK_PENDING 1 | 353 | #define TCON_LINK_PENDING 1 |
@@ -745,8 +756,6 @@ GLOBAL_EXTERN unsigned int GlobalTotalActiveXid; /* prot by GlobalMid_Sem */ | |||
745 | GLOBAL_EXTERN unsigned int GlobalMaxActiveXid; /* prot by GlobalMid_Sem */ | 756 | GLOBAL_EXTERN unsigned int GlobalMaxActiveXid; /* prot by GlobalMid_Sem */ |
746 | GLOBAL_EXTERN spinlock_t GlobalMid_Lock; /* protects above & list operations */ | 757 | GLOBAL_EXTERN spinlock_t GlobalMid_Lock; /* protects above & list operations */ |
747 | /* on midQ entries */ | 758 | /* on midQ entries */ |
748 | GLOBAL_EXTERN char Local_System_Name[15]; | ||
749 | |||
750 | /* | 759 | /* |
751 | * Global counters, updated atomically | 760 | * Global counters, updated atomically |
752 | */ | 761 | */ |
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index edb6d90efdf2..e6d1481b16c1 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h | |||
@@ -54,7 +54,8 @@ do { \ | |||
54 | __func__, curr_xid, (int)rc); \ | 54 | __func__, curr_xid, (int)rc); \ |
55 | } while (0) | 55 | } while (0) |
56 | extern char *build_path_from_dentry(struct dentry *); | 56 | extern char *build_path_from_dentry(struct dentry *); |
57 | extern char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb); | 57 | extern char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb, |
58 | struct cifsTconInfo *tcon); | ||
58 | extern char *build_wildcard_path_from_dentry(struct dentry *direntry); | 59 | extern char *build_wildcard_path_from_dentry(struct dentry *direntry); |
59 | extern char *cifs_compose_mount_options(const char *sb_mountdata, | 60 | extern char *cifs_compose_mount_options(const char *sb_mountdata, |
60 | const char *fullpath, const struct dfs_info3_param *ref, | 61 | const char *fullpath, const struct dfs_info3_param *ref, |
@@ -79,9 +80,7 @@ extern bool is_valid_oplock_break(struct smb_hdr *smb, | |||
79 | struct TCP_Server_Info *); | 80 | struct TCP_Server_Info *); |
80 | extern bool is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof); | 81 | extern bool is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof); |
81 | extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool); | 82 | extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool); |
82 | #ifdef CONFIG_CIFS_EXPERIMENTAL | ||
83 | extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool); | 83 | extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool); |
84 | #endif | ||
85 | extern unsigned int smbCalcSize(struct smb_hdr *ptr); | 84 | extern unsigned int smbCalcSize(struct smb_hdr *ptr); |
86 | extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr); | 85 | extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr); |
87 | extern int decode_negTokenInit(unsigned char *security_blob, int length, | 86 | extern int decode_negTokenInit(unsigned char *security_blob, int length, |
@@ -104,6 +103,7 @@ extern struct timespec cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601); | |||
104 | extern u64 cifs_UnixTimeToNT(struct timespec); | 103 | extern u64 cifs_UnixTimeToNT(struct timespec); |
105 | extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, | 104 | extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, |
106 | int offset); | 105 | int offset); |
106 | extern void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock); | ||
107 | 107 | ||
108 | extern struct cifsFileInfo *cifs_new_fileinfo(__u16 fileHandle, | 108 | extern struct cifsFileInfo *cifs_new_fileinfo(__u16 fileHandle, |
109 | struct file *file, struct tcon_link *tlink, | 109 | struct file *file, struct tcon_link *tlink, |
@@ -129,10 +129,12 @@ extern int cifs_get_file_info_unix(struct file *filp); | |||
129 | extern int cifs_get_inode_info_unix(struct inode **pinode, | 129 | extern int cifs_get_inode_info_unix(struct inode **pinode, |
130 | const unsigned char *search_path, | 130 | const unsigned char *search_path, |
131 | struct super_block *sb, int xid); | 131 | struct super_block *sb, int xid); |
132 | extern void cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, | 132 | extern int cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, |
133 | struct cifs_fattr *fattr, struct inode *inode, | 133 | struct cifs_fattr *fattr, struct inode *inode, |
134 | const char *path, const __u16 *pfid); | 134 | const char *path, const __u16 *pfid); |
135 | extern int mode_to_acl(struct inode *inode, const char *path, __u64); | 135 | extern int mode_to_cifs_acl(struct inode *inode, const char *path, __u64); |
136 | extern struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *, struct inode *, | ||
137 | const char *, u32 *); | ||
136 | 138 | ||
137 | extern int cifs_mount(struct super_block *, struct cifs_sb_info *, char *, | 139 | extern int cifs_mount(struct super_block *, struct cifs_sb_info *, char *, |
138 | const char *); | 140 | const char *); |
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 2f2632b6df5a..67acfb3acad2 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c | |||
@@ -2478,95 +2478,6 @@ querySymLinkRetry: | |||
2478 | } | 2478 | } |
2479 | 2479 | ||
2480 | #ifdef CONFIG_CIFS_EXPERIMENTAL | 2480 | #ifdef CONFIG_CIFS_EXPERIMENTAL |
2481 | /* Initialize NT TRANSACT SMB into small smb request buffer. | ||
2482 | This assumes that all NT TRANSACTS that we init here have | ||
2483 | total parm and data under about 400 bytes (to fit in small cifs | ||
2484 | buffer size), which is the case so far, it easily fits. NB: | ||
2485 | Setup words themselves and ByteCount | ||
2486 | MaxSetupCount (size of returned setup area) and | ||
2487 | MaxParameterCount (returned parms size) must be set by caller */ | ||
2488 | static int | ||
2489 | smb_init_nttransact(const __u16 sub_command, const int setup_count, | ||
2490 | const int parm_len, struct cifsTconInfo *tcon, | ||
2491 | void **ret_buf) | ||
2492 | { | ||
2493 | int rc; | ||
2494 | __u32 temp_offset; | ||
2495 | struct smb_com_ntransact_req *pSMB; | ||
2496 | |||
2497 | rc = small_smb_init(SMB_COM_NT_TRANSACT, 19 + setup_count, tcon, | ||
2498 | (void **)&pSMB); | ||
2499 | if (rc) | ||
2500 | return rc; | ||
2501 | *ret_buf = (void *)pSMB; | ||
2502 | pSMB->Reserved = 0; | ||
2503 | pSMB->TotalParameterCount = cpu_to_le32(parm_len); | ||
2504 | pSMB->TotalDataCount = 0; | ||
2505 | pSMB->MaxDataCount = cpu_to_le32((tcon->ses->server->maxBuf - | ||
2506 | MAX_CIFS_HDR_SIZE) & 0xFFFFFF00); | ||
2507 | pSMB->ParameterCount = pSMB->TotalParameterCount; | ||
2508 | pSMB->DataCount = pSMB->TotalDataCount; | ||
2509 | temp_offset = offsetof(struct smb_com_ntransact_req, Parms) + | ||
2510 | (setup_count * 2) - 4 /* for rfc1001 length itself */; | ||
2511 | pSMB->ParameterOffset = cpu_to_le32(temp_offset); | ||
2512 | pSMB->DataOffset = cpu_to_le32(temp_offset + parm_len); | ||
2513 | pSMB->SetupCount = setup_count; /* no need to le convert byte fields */ | ||
2514 | pSMB->SubCommand = cpu_to_le16(sub_command); | ||
2515 | return 0; | ||
2516 | } | ||
2517 | |||
2518 | static int | ||
2519 | validate_ntransact(char *buf, char **ppparm, char **ppdata, | ||
2520 | __u32 *pparmlen, __u32 *pdatalen) | ||
2521 | { | ||
2522 | char *end_of_smb; | ||
2523 | __u32 data_count, data_offset, parm_count, parm_offset; | ||
2524 | struct smb_com_ntransact_rsp *pSMBr; | ||
2525 | |||
2526 | *pdatalen = 0; | ||
2527 | *pparmlen = 0; | ||
2528 | |||
2529 | if (buf == NULL) | ||
2530 | return -EINVAL; | ||
2531 | |||
2532 | pSMBr = (struct smb_com_ntransact_rsp *)buf; | ||
2533 | |||
2534 | /* ByteCount was converted from little endian in SendReceive */ | ||
2535 | end_of_smb = 2 /* sizeof byte count */ + pSMBr->ByteCount + | ||
2536 | (char *)&pSMBr->ByteCount; | ||
2537 | |||
2538 | data_offset = le32_to_cpu(pSMBr->DataOffset); | ||
2539 | data_count = le32_to_cpu(pSMBr->DataCount); | ||
2540 | parm_offset = le32_to_cpu(pSMBr->ParameterOffset); | ||
2541 | parm_count = le32_to_cpu(pSMBr->ParameterCount); | ||
2542 | |||
2543 | *ppparm = (char *)&pSMBr->hdr.Protocol + parm_offset; | ||
2544 | *ppdata = (char *)&pSMBr->hdr.Protocol + data_offset; | ||
2545 | |||
2546 | /* should we also check that parm and data areas do not overlap? */ | ||
2547 | if (*ppparm > end_of_smb) { | ||
2548 | cFYI(1, "parms start after end of smb"); | ||
2549 | return -EINVAL; | ||
2550 | } else if (parm_count + *ppparm > end_of_smb) { | ||
2551 | cFYI(1, "parm end after end of smb"); | ||
2552 | return -EINVAL; | ||
2553 | } else if (*ppdata > end_of_smb) { | ||
2554 | cFYI(1, "data starts after end of smb"); | ||
2555 | return -EINVAL; | ||
2556 | } else if (data_count + *ppdata > end_of_smb) { | ||
2557 | cFYI(1, "data %p + count %d (%p) past smb end %p start %p", | ||
2558 | *ppdata, data_count, (data_count + *ppdata), | ||
2559 | end_of_smb, pSMBr); | ||
2560 | return -EINVAL; | ||
2561 | } else if (parm_count + data_count > pSMBr->ByteCount) { | ||
2562 | cFYI(1, "parm count and data count larger than SMB"); | ||
2563 | return -EINVAL; | ||
2564 | } | ||
2565 | *pdatalen = data_count; | ||
2566 | *pparmlen = parm_count; | ||
2567 | return 0; | ||
2568 | } | ||
2569 | |||
2570 | int | 2481 | int |
2571 | CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon, | 2482 | CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon, |
2572 | const unsigned char *searchName, | 2483 | const unsigned char *searchName, |
@@ -3056,7 +2967,97 @@ GetExtAttrOut: | |||
3056 | 2967 | ||
3057 | #endif /* CONFIG_POSIX */ | 2968 | #endif /* CONFIG_POSIX */ |
3058 | 2969 | ||
3059 | #ifdef CONFIG_CIFS_EXPERIMENTAL | 2970 | #ifdef CONFIG_CIFS_ACL |
2971 | /* | ||
2972 | * Initialize NT TRANSACT SMB into small smb request buffer. This assumes that | ||
2973 | * all NT TRANSACTS that we init here have total parm and data under about 400 | ||
2974 | * bytes (to fit in small cifs buffer size), which is the case so far, it | ||
2975 | * easily fits. NB: Setup words themselves and ByteCount MaxSetupCount (size of | ||
2976 | * returned setup area) and MaxParameterCount (returned parms size) must be set | ||
2977 | * by caller | ||
2978 | */ | ||
2979 | static int | ||
2980 | smb_init_nttransact(const __u16 sub_command, const int setup_count, | ||
2981 | const int parm_len, struct cifsTconInfo *tcon, | ||
2982 | void **ret_buf) | ||
2983 | { | ||
2984 | int rc; | ||
2985 | __u32 temp_offset; | ||
2986 | struct smb_com_ntransact_req *pSMB; | ||
2987 | |||
2988 | rc = small_smb_init(SMB_COM_NT_TRANSACT, 19 + setup_count, tcon, | ||
2989 | (void **)&pSMB); | ||
2990 | if (rc) | ||
2991 | return rc; | ||
2992 | *ret_buf = (void *)pSMB; | ||
2993 | pSMB->Reserved = 0; | ||
2994 | pSMB->TotalParameterCount = cpu_to_le32(parm_len); | ||
2995 | pSMB->TotalDataCount = 0; | ||
2996 | pSMB->MaxDataCount = cpu_to_le32((tcon->ses->server->maxBuf - | ||
2997 | MAX_CIFS_HDR_SIZE) & 0xFFFFFF00); | ||
2998 | pSMB->ParameterCount = pSMB->TotalParameterCount; | ||
2999 | pSMB->DataCount = pSMB->TotalDataCount; | ||
3000 | temp_offset = offsetof(struct smb_com_ntransact_req, Parms) + | ||
3001 | (setup_count * 2) - 4 /* for rfc1001 length itself */; | ||
3002 | pSMB->ParameterOffset = cpu_to_le32(temp_offset); | ||
3003 | pSMB->DataOffset = cpu_to_le32(temp_offset + parm_len); | ||
3004 | pSMB->SetupCount = setup_count; /* no need to le convert byte fields */ | ||
3005 | pSMB->SubCommand = cpu_to_le16(sub_command); | ||
3006 | return 0; | ||
3007 | } | ||
3008 | |||
3009 | static int | ||
3010 | validate_ntransact(char *buf, char **ppparm, char **ppdata, | ||
3011 | __u32 *pparmlen, __u32 *pdatalen) | ||
3012 | { | ||
3013 | char *end_of_smb; | ||
3014 | __u32 data_count, data_offset, parm_count, parm_offset; | ||
3015 | struct smb_com_ntransact_rsp *pSMBr; | ||
3016 | |||
3017 | *pdatalen = 0; | ||
3018 | *pparmlen = 0; | ||
3019 | |||
3020 | if (buf == NULL) | ||
3021 | return -EINVAL; | ||
3022 | |||
3023 | pSMBr = (struct smb_com_ntransact_rsp *)buf; | ||
3024 | |||
3025 | /* ByteCount was converted from little endian in SendReceive */ | ||
3026 | end_of_smb = 2 /* sizeof byte count */ + pSMBr->ByteCount + | ||
3027 | (char *)&pSMBr->ByteCount; | ||
3028 | |||
3029 | data_offset = le32_to_cpu(pSMBr->DataOffset); | ||
3030 | data_count = le32_to_cpu(pSMBr->DataCount); | ||
3031 | parm_offset = le32_to_cpu(pSMBr->ParameterOffset); | ||
3032 | parm_count = le32_to_cpu(pSMBr->ParameterCount); | ||
3033 | |||
3034 | *ppparm = (char *)&pSMBr->hdr.Protocol + parm_offset; | ||
3035 | *ppdata = (char *)&pSMBr->hdr.Protocol + data_offset; | ||
3036 | |||
3037 | /* should we also check that parm and data areas do not overlap? */ | ||
3038 | if (*ppparm > end_of_smb) { | ||
3039 | cFYI(1, "parms start after end of smb"); | ||
3040 | return -EINVAL; | ||
3041 | } else if (parm_count + *ppparm > end_of_smb) { | ||
3042 | cFYI(1, "parm end after end of smb"); | ||
3043 | return -EINVAL; | ||
3044 | } else if (*ppdata > end_of_smb) { | ||
3045 | cFYI(1, "data starts after end of smb"); | ||
3046 | return -EINVAL; | ||
3047 | } else if (data_count + *ppdata > end_of_smb) { | ||
3048 | cFYI(1, "data %p + count %d (%p) past smb end %p start %p", | ||
3049 | *ppdata, data_count, (data_count + *ppdata), | ||
3050 | end_of_smb, pSMBr); | ||
3051 | return -EINVAL; | ||
3052 | } else if (parm_count + data_count > pSMBr->ByteCount) { | ||
3053 | cFYI(1, "parm count and data count larger than SMB"); | ||
3054 | return -EINVAL; | ||
3055 | } | ||
3056 | *pdatalen = data_count; | ||
3057 | *pparmlen = parm_count; | ||
3058 | return 0; | ||
3059 | } | ||
3060 | |||
3060 | /* Get Security Descriptor (by handle) from remote server for a file or dir */ | 3061 | /* Get Security Descriptor (by handle) from remote server for a file or dir */ |
3061 | int | 3062 | int |
3062 | CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid, | 3063 | CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid, |
@@ -3214,7 +3215,7 @@ setCifsAclRetry: | |||
3214 | return (rc); | 3215 | return (rc); |
3215 | } | 3216 | } |
3216 | 3217 | ||
3217 | #endif /* CONFIG_CIFS_EXPERIMENTAL */ | 3218 | #endif /* CONFIG_CIFS_ACL */ |
3218 | 3219 | ||
3219 | /* Legacy Query Path Information call for lookup to old servers such | 3220 | /* Legacy Query Path Information call for lookup to old servers such |
3220 | as Win9x/WinME */ | 3221 | as Win9x/WinME */ |
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 9eb327defa1d..cc1a8604a790 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c | |||
@@ -105,6 +105,7 @@ struct smb_vol { | |||
105 | unsigned int wsize; | 105 | unsigned int wsize; |
106 | bool sockopt_tcp_nodelay:1; | 106 | bool sockopt_tcp_nodelay:1; |
107 | unsigned short int port; | 107 | unsigned short int port; |
108 | unsigned long actimeo; /* attribute cache timeout (jiffies) */ | ||
108 | char *prepath; | 109 | char *prepath; |
109 | struct sockaddr_storage srcaddr; /* allow binding to a local IP */ | 110 | struct sockaddr_storage srcaddr; /* allow binding to a local IP */ |
110 | struct nls_table *local_nls; | 111 | struct nls_table *local_nls; |
@@ -116,6 +117,7 @@ struct smb_vol { | |||
116 | 117 | ||
117 | static int ipv4_connect(struct TCP_Server_Info *server); | 118 | static int ipv4_connect(struct TCP_Server_Info *server); |
118 | static int ipv6_connect(struct TCP_Server_Info *server); | 119 | static int ipv6_connect(struct TCP_Server_Info *server); |
120 | static void tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink); | ||
119 | static void cifs_prune_tlinks(struct work_struct *work); | 121 | static void cifs_prune_tlinks(struct work_struct *work); |
120 | 122 | ||
121 | /* | 123 | /* |
@@ -805,23 +807,20 @@ cifs_parse_mount_options(char *options, const char *devname, | |||
805 | short int override_gid = -1; | 807 | short int override_gid = -1; |
806 | bool uid_specified = false; | 808 | bool uid_specified = false; |
807 | bool gid_specified = false; | 809 | bool gid_specified = false; |
810 | char *nodename = utsname()->nodename; | ||
808 | 811 | ||
809 | separator[0] = ','; | 812 | separator[0] = ','; |
810 | separator[1] = 0; | 813 | separator[1] = 0; |
811 | 814 | ||
812 | if (Local_System_Name[0] != 0) | 815 | /* |
813 | memcpy(vol->source_rfc1001_name, Local_System_Name, 15); | 816 | * does not have to be perfect mapping since field is |
814 | else { | 817 | * informational, only used for servers that do not support |
815 | char *nodename = utsname()->nodename; | 818 | * port 445 and it can be overridden at mount time |
816 | int n = strnlen(nodename, 15); | 819 | */ |
817 | memset(vol->source_rfc1001_name, 0x20, 15); | 820 | memset(vol->source_rfc1001_name, 0x20, 15); |
818 | for (i = 0; i < n; i++) { | 821 | for (i = 0; i < strnlen(nodename, 15); i++) |
819 | /* does not have to be perfect mapping since field is | 822 | vol->source_rfc1001_name[i] = toupper(nodename[i]); |
820 | informational, only used for servers that do not support | 823 | |
821 | port 445 and it can be overridden at mount time */ | ||
822 | vol->source_rfc1001_name[i] = toupper(nodename[i]); | ||
823 | } | ||
824 | } | ||
825 | vol->source_rfc1001_name[15] = 0; | 824 | vol->source_rfc1001_name[15] = 0; |
826 | /* null target name indicates to use *SMBSERVR default called name | 825 | /* null target name indicates to use *SMBSERVR default called name |
827 | if we end up sending RFC1001 session initialize */ | 826 | if we end up sending RFC1001 session initialize */ |
@@ -839,6 +838,8 @@ cifs_parse_mount_options(char *options, const char *devname, | |||
839 | /* default to using server inode numbers where available */ | 838 | /* default to using server inode numbers where available */ |
840 | vol->server_ino = 1; | 839 | vol->server_ino = 1; |
841 | 840 | ||
841 | vol->actimeo = CIFS_DEF_ACTIMEO; | ||
842 | |||
842 | if (!options) | 843 | if (!options) |
843 | return 1; | 844 | return 1; |
844 | 845 | ||
@@ -1213,6 +1214,16 @@ cifs_parse_mount_options(char *options, const char *devname, | |||
1213 | printk(KERN_WARNING "CIFS: server net" | 1214 | printk(KERN_WARNING "CIFS: server net" |
1214 | "biosname longer than 15 truncated.\n"); | 1215 | "biosname longer than 15 truncated.\n"); |
1215 | } | 1216 | } |
1217 | } else if (strnicmp(data, "actimeo", 7) == 0) { | ||
1218 | if (value && *value) { | ||
1219 | vol->actimeo = HZ * simple_strtoul(value, | ||
1220 | &value, 0); | ||
1221 | if (vol->actimeo > CIFS_MAX_ACTIMEO) { | ||
1222 | cERROR(1, "CIFS: attribute cache" | ||
1223 | "timeout too large"); | ||
1224 | return 1; | ||
1225 | } | ||
1226 | } | ||
1216 | } else if (strnicmp(data, "credentials", 4) == 0) { | 1227 | } else if (strnicmp(data, "credentials", 4) == 0) { |
1217 | /* ignore */ | 1228 | /* ignore */ |
1218 | } else if (strnicmp(data, "version", 3) == 0) { | 1229 | } else if (strnicmp(data, "version", 3) == 0) { |
@@ -1351,6 +1362,11 @@ cifs_parse_mount_options(char *options, const char *devname, | |||
1351 | "supported. Instead set " | 1362 | "supported. Instead set " |
1352 | "/proc/fs/cifs/LookupCacheEnabled to 0\n"); | 1363 | "/proc/fs/cifs/LookupCacheEnabled to 0\n"); |
1353 | } else if (strnicmp(data, "fsc", 3) == 0) { | 1364 | } else if (strnicmp(data, "fsc", 3) == 0) { |
1365 | #ifndef CONFIG_CIFS_FSCACHE | ||
1366 | cERROR(1, "FS-Cache support needs CONFIG_CIFS_FSCACHE" | ||
1367 | "kernel config option set"); | ||
1368 | return 1; | ||
1369 | #endif | ||
1354 | vol->fsc = true; | 1370 | vol->fsc = true; |
1355 | } else if (strnicmp(data, "mfsymlinks", 10) == 0) { | 1371 | } else if (strnicmp(data, "mfsymlinks", 10) == 0) { |
1356 | vol->mfsymlinks = true; | 1372 | vol->mfsymlinks = true; |
@@ -2565,6 +2581,8 @@ static void setup_cifs_sb(struct smb_vol *pvolume_info, | |||
2565 | cFYI(1, "file mode: 0x%x dir mode: 0x%x", | 2581 | cFYI(1, "file mode: 0x%x dir mode: 0x%x", |
2566 | cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode); | 2582 | cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode); |
2567 | 2583 | ||
2584 | cifs_sb->actimeo = pvolume_info->actimeo; | ||
2585 | |||
2568 | if (pvolume_info->noperm) | 2586 | if (pvolume_info->noperm) |
2569 | cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_PERM; | 2587 | cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_PERM; |
2570 | if (pvolume_info->setuids) | 2588 | if (pvolume_info->setuids) |
@@ -2815,13 +2833,13 @@ remote_path_check: | |||
2815 | /* check if a whole path (including prepath) is not remote */ | 2833 | /* check if a whole path (including prepath) is not remote */ |
2816 | if (!rc && cifs_sb->prepathlen && tcon) { | 2834 | if (!rc && cifs_sb->prepathlen && tcon) { |
2817 | /* build_path_to_root works only when we have a valid tcon */ | 2835 | /* build_path_to_root works only when we have a valid tcon */ |
2818 | full_path = cifs_build_path_to_root(cifs_sb); | 2836 | full_path = cifs_build_path_to_root(cifs_sb, tcon); |
2819 | if (full_path == NULL) { | 2837 | if (full_path == NULL) { |
2820 | rc = -ENOMEM; | 2838 | rc = -ENOMEM; |
2821 | goto mount_fail_check; | 2839 | goto mount_fail_check; |
2822 | } | 2840 | } |
2823 | rc = is_path_accessible(xid, tcon, cifs_sb, full_path); | 2841 | rc = is_path_accessible(xid, tcon, cifs_sb, full_path); |
2824 | if (rc != -EREMOTE) { | 2842 | if (rc != 0 && rc != -EREMOTE) { |
2825 | kfree(full_path); | 2843 | kfree(full_path); |
2826 | goto mount_fail_check; | 2844 | goto mount_fail_check; |
2827 | } | 2845 | } |
@@ -2900,24 +2918,16 @@ remote_path_check: | |||
2900 | goto mount_fail_check; | 2918 | goto mount_fail_check; |
2901 | } | 2919 | } |
2902 | 2920 | ||
2903 | tlink->tl_index = pSesInfo->linux_uid; | 2921 | tlink->tl_uid = pSesInfo->linux_uid; |
2904 | tlink->tl_tcon = tcon; | 2922 | tlink->tl_tcon = tcon; |
2905 | tlink->tl_time = jiffies; | 2923 | tlink->tl_time = jiffies; |
2906 | set_bit(TCON_LINK_MASTER, &tlink->tl_flags); | 2924 | set_bit(TCON_LINK_MASTER, &tlink->tl_flags); |
2907 | set_bit(TCON_LINK_IN_TREE, &tlink->tl_flags); | 2925 | set_bit(TCON_LINK_IN_TREE, &tlink->tl_flags); |
2908 | 2926 | ||
2909 | rc = radix_tree_preload(GFP_KERNEL); | 2927 | cifs_sb->master_tlink = tlink; |
2910 | if (rc == -ENOMEM) { | ||
2911 | kfree(tlink); | ||
2912 | goto mount_fail_check; | ||
2913 | } | ||
2914 | |||
2915 | spin_lock(&cifs_sb->tlink_tree_lock); | 2928 | spin_lock(&cifs_sb->tlink_tree_lock); |
2916 | radix_tree_insert(&cifs_sb->tlink_tree, pSesInfo->linux_uid, tlink); | 2929 | tlink_rb_insert(&cifs_sb->tlink_tree, tlink); |
2917 | radix_tree_tag_set(&cifs_sb->tlink_tree, pSesInfo->linux_uid, | ||
2918 | CIFS_TLINK_MASTER_TAG); | ||
2919 | spin_unlock(&cifs_sb->tlink_tree_lock); | 2930 | spin_unlock(&cifs_sb->tlink_tree_lock); |
2920 | radix_tree_preload_end(); | ||
2921 | 2931 | ||
2922 | queue_delayed_work(system_nrt_wq, &cifs_sb->prune_tlinks, | 2932 | queue_delayed_work(system_nrt_wq, &cifs_sb->prune_tlinks, |
2923 | TLINK_IDLE_EXPIRE); | 2933 | TLINK_IDLE_EXPIRE); |
@@ -3107,32 +3117,25 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, | |||
3107 | int | 3117 | int |
3108 | cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb) | 3118 | cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb) |
3109 | { | 3119 | { |
3110 | int i, ret; | 3120 | struct rb_root *root = &cifs_sb->tlink_tree; |
3121 | struct rb_node *node; | ||
3122 | struct tcon_link *tlink; | ||
3111 | char *tmp; | 3123 | char *tmp; |
3112 | struct tcon_link *tlink[8]; | ||
3113 | unsigned long index = 0; | ||
3114 | 3124 | ||
3115 | cancel_delayed_work_sync(&cifs_sb->prune_tlinks); | 3125 | cancel_delayed_work_sync(&cifs_sb->prune_tlinks); |
3116 | 3126 | ||
3117 | do { | 3127 | spin_lock(&cifs_sb->tlink_tree_lock); |
3118 | spin_lock(&cifs_sb->tlink_tree_lock); | 3128 | while ((node = rb_first(root))) { |
3119 | ret = radix_tree_gang_lookup(&cifs_sb->tlink_tree, | 3129 | tlink = rb_entry(node, struct tcon_link, tl_rbnode); |
3120 | (void **)tlink, index, | 3130 | cifs_get_tlink(tlink); |
3121 | ARRAY_SIZE(tlink)); | 3131 | clear_bit(TCON_LINK_IN_TREE, &tlink->tl_flags); |
3122 | /* increment index for next pass */ | 3132 | rb_erase(node, root); |
3123 | if (ret > 0) | ||
3124 | index = tlink[ret - 1]->tl_index + 1; | ||
3125 | for (i = 0; i < ret; i++) { | ||
3126 | cifs_get_tlink(tlink[i]); | ||
3127 | clear_bit(TCON_LINK_IN_TREE, &tlink[i]->tl_flags); | ||
3128 | radix_tree_delete(&cifs_sb->tlink_tree, | ||
3129 | tlink[i]->tl_index); | ||
3130 | } | ||
3131 | spin_unlock(&cifs_sb->tlink_tree_lock); | ||
3132 | 3133 | ||
3133 | for (i = 0; i < ret; i++) | 3134 | spin_unlock(&cifs_sb->tlink_tree_lock); |
3134 | cifs_put_tlink(tlink[i]); | 3135 | cifs_put_tlink(tlink); |
3135 | } while (ret != 0); | 3136 | spin_lock(&cifs_sb->tlink_tree_lock); |
3137 | } | ||
3138 | spin_unlock(&cifs_sb->tlink_tree_lock); | ||
3136 | 3139 | ||
3137 | tmp = cifs_sb->prepath; | 3140 | tmp = cifs_sb->prepath; |
3138 | cifs_sb->prepathlen = 0; | 3141 | cifs_sb->prepathlen = 0; |
@@ -3271,22 +3274,10 @@ out: | |||
3271 | return tcon; | 3274 | return tcon; |
3272 | } | 3275 | } |
3273 | 3276 | ||
3274 | static struct tcon_link * | 3277 | static inline struct tcon_link * |
3275 | cifs_sb_master_tlink(struct cifs_sb_info *cifs_sb) | 3278 | cifs_sb_master_tlink(struct cifs_sb_info *cifs_sb) |
3276 | { | 3279 | { |
3277 | struct tcon_link *tlink; | 3280 | return cifs_sb->master_tlink; |
3278 | unsigned int ret; | ||
3279 | |||
3280 | spin_lock(&cifs_sb->tlink_tree_lock); | ||
3281 | ret = radix_tree_gang_lookup_tag(&cifs_sb->tlink_tree, (void **)&tlink, | ||
3282 | 0, 1, CIFS_TLINK_MASTER_TAG); | ||
3283 | spin_unlock(&cifs_sb->tlink_tree_lock); | ||
3284 | |||
3285 | /* the master tcon should always be present */ | ||
3286 | if (ret == 0) | ||
3287 | BUG(); | ||
3288 | |||
3289 | return tlink; | ||
3290 | } | 3281 | } |
3291 | 3282 | ||
3292 | struct cifsTconInfo * | 3283 | struct cifsTconInfo * |
@@ -3302,6 +3293,47 @@ cifs_sb_tcon_pending_wait(void *unused) | |||
3302 | return signal_pending(current) ? -ERESTARTSYS : 0; | 3293 | return signal_pending(current) ? -ERESTARTSYS : 0; |
3303 | } | 3294 | } |
3304 | 3295 | ||
3296 | /* find and return a tlink with given uid */ | ||
3297 | static struct tcon_link * | ||
3298 | tlink_rb_search(struct rb_root *root, uid_t uid) | ||
3299 | { | ||
3300 | struct rb_node *node = root->rb_node; | ||
3301 | struct tcon_link *tlink; | ||
3302 | |||
3303 | while (node) { | ||
3304 | tlink = rb_entry(node, struct tcon_link, tl_rbnode); | ||
3305 | |||
3306 | if (tlink->tl_uid > uid) | ||
3307 | node = node->rb_left; | ||
3308 | else if (tlink->tl_uid < uid) | ||
3309 | node = node->rb_right; | ||
3310 | else | ||
3311 | return tlink; | ||
3312 | } | ||
3313 | return NULL; | ||
3314 | } | ||
3315 | |||
3316 | /* insert a tcon_link into the tree */ | ||
3317 | static void | ||
3318 | tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink) | ||
3319 | { | ||
3320 | struct rb_node **new = &(root->rb_node), *parent = NULL; | ||
3321 | struct tcon_link *tlink; | ||
3322 | |||
3323 | while (*new) { | ||
3324 | tlink = rb_entry(*new, struct tcon_link, tl_rbnode); | ||
3325 | parent = *new; | ||
3326 | |||
3327 | if (tlink->tl_uid > new_tlink->tl_uid) | ||
3328 | new = &((*new)->rb_left); | ||
3329 | else | ||
3330 | new = &((*new)->rb_right); | ||
3331 | } | ||
3332 | |||
3333 | rb_link_node(&new_tlink->tl_rbnode, parent, new); | ||
3334 | rb_insert_color(&new_tlink->tl_rbnode, root); | ||
3335 | } | ||
3336 | |||
3305 | /* | 3337 | /* |
3306 | * Find or construct an appropriate tcon given a cifs_sb and the fsuid of the | 3338 | * Find or construct an appropriate tcon given a cifs_sb and the fsuid of the |
3307 | * current task. | 3339 | * current task. |
@@ -3309,7 +3341,7 @@ cifs_sb_tcon_pending_wait(void *unused) | |||
3309 | * If the superblock doesn't refer to a multiuser mount, then just return | 3341 | * If the superblock doesn't refer to a multiuser mount, then just return |
3310 | * the master tcon for the mount. | 3342 | * the master tcon for the mount. |
3311 | * | 3343 | * |
3312 | * First, search the radix tree for an existing tcon for this fsuid. If one | 3344 | * First, search the rbtree for an existing tcon for this fsuid. If one |
3313 | * exists, then check to see if it's pending construction. If it is then wait | 3345 | * exists, then check to see if it's pending construction. If it is then wait |
3314 | * for construction to complete. Once it's no longer pending, check to see if | 3346 | * for construction to complete. Once it's no longer pending, check to see if |
3315 | * it failed and either return an error or retry construction, depending on | 3347 | * it failed and either return an error or retry construction, depending on |
@@ -3322,14 +3354,14 @@ struct tcon_link * | |||
3322 | cifs_sb_tlink(struct cifs_sb_info *cifs_sb) | 3354 | cifs_sb_tlink(struct cifs_sb_info *cifs_sb) |
3323 | { | 3355 | { |
3324 | int ret; | 3356 | int ret; |
3325 | unsigned long fsuid = (unsigned long) current_fsuid(); | 3357 | uid_t fsuid = current_fsuid(); |
3326 | struct tcon_link *tlink, *newtlink; | 3358 | struct tcon_link *tlink, *newtlink; |
3327 | 3359 | ||
3328 | if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) | 3360 | if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) |
3329 | return cifs_get_tlink(cifs_sb_master_tlink(cifs_sb)); | 3361 | return cifs_get_tlink(cifs_sb_master_tlink(cifs_sb)); |
3330 | 3362 | ||
3331 | spin_lock(&cifs_sb->tlink_tree_lock); | 3363 | spin_lock(&cifs_sb->tlink_tree_lock); |
3332 | tlink = radix_tree_lookup(&cifs_sb->tlink_tree, fsuid); | 3364 | tlink = tlink_rb_search(&cifs_sb->tlink_tree, fsuid); |
3333 | if (tlink) | 3365 | if (tlink) |
3334 | cifs_get_tlink(tlink); | 3366 | cifs_get_tlink(tlink); |
3335 | spin_unlock(&cifs_sb->tlink_tree_lock); | 3367 | spin_unlock(&cifs_sb->tlink_tree_lock); |
@@ -3338,36 +3370,24 @@ cifs_sb_tlink(struct cifs_sb_info *cifs_sb) | |||
3338 | newtlink = kzalloc(sizeof(*tlink), GFP_KERNEL); | 3370 | newtlink = kzalloc(sizeof(*tlink), GFP_KERNEL); |
3339 | if (newtlink == NULL) | 3371 | if (newtlink == NULL) |
3340 | return ERR_PTR(-ENOMEM); | 3372 | return ERR_PTR(-ENOMEM); |
3341 | newtlink->tl_index = fsuid; | 3373 | newtlink->tl_uid = fsuid; |
3342 | newtlink->tl_tcon = ERR_PTR(-EACCES); | 3374 | newtlink->tl_tcon = ERR_PTR(-EACCES); |
3343 | set_bit(TCON_LINK_PENDING, &newtlink->tl_flags); | 3375 | set_bit(TCON_LINK_PENDING, &newtlink->tl_flags); |
3344 | set_bit(TCON_LINK_IN_TREE, &newtlink->tl_flags); | 3376 | set_bit(TCON_LINK_IN_TREE, &newtlink->tl_flags); |
3345 | cifs_get_tlink(newtlink); | 3377 | cifs_get_tlink(newtlink); |
3346 | 3378 | ||
3347 | ret = radix_tree_preload(GFP_KERNEL); | ||
3348 | if (ret != 0) { | ||
3349 | kfree(newtlink); | ||
3350 | return ERR_PTR(ret); | ||
3351 | } | ||
3352 | |||
3353 | spin_lock(&cifs_sb->tlink_tree_lock); | 3379 | spin_lock(&cifs_sb->tlink_tree_lock); |
3354 | /* was one inserted after previous search? */ | 3380 | /* was one inserted after previous search? */ |
3355 | tlink = radix_tree_lookup(&cifs_sb->tlink_tree, fsuid); | 3381 | tlink = tlink_rb_search(&cifs_sb->tlink_tree, fsuid); |
3356 | if (tlink) { | 3382 | if (tlink) { |
3357 | cifs_get_tlink(tlink); | 3383 | cifs_get_tlink(tlink); |
3358 | spin_unlock(&cifs_sb->tlink_tree_lock); | 3384 | spin_unlock(&cifs_sb->tlink_tree_lock); |
3359 | radix_tree_preload_end(); | ||
3360 | kfree(newtlink); | 3385 | kfree(newtlink); |
3361 | goto wait_for_construction; | 3386 | goto wait_for_construction; |
3362 | } | 3387 | } |
3363 | ret = radix_tree_insert(&cifs_sb->tlink_tree, fsuid, newtlink); | ||
3364 | spin_unlock(&cifs_sb->tlink_tree_lock); | ||
3365 | radix_tree_preload_end(); | ||
3366 | if (ret) { | ||
3367 | kfree(newtlink); | ||
3368 | return ERR_PTR(ret); | ||
3369 | } | ||
3370 | tlink = newtlink; | 3388 | tlink = newtlink; |
3389 | tlink_rb_insert(&cifs_sb->tlink_tree, tlink); | ||
3390 | spin_unlock(&cifs_sb->tlink_tree_lock); | ||
3371 | } else { | 3391 | } else { |
3372 | wait_for_construction: | 3392 | wait_for_construction: |
3373 | ret = wait_on_bit(&tlink->tl_flags, TCON_LINK_PENDING, | 3393 | ret = wait_on_bit(&tlink->tl_flags, TCON_LINK_PENDING, |
@@ -3413,39 +3433,39 @@ cifs_prune_tlinks(struct work_struct *work) | |||
3413 | { | 3433 | { |
3414 | struct cifs_sb_info *cifs_sb = container_of(work, struct cifs_sb_info, | 3434 | struct cifs_sb_info *cifs_sb = container_of(work, struct cifs_sb_info, |
3415 | prune_tlinks.work); | 3435 | prune_tlinks.work); |
3416 | struct tcon_link *tlink[8]; | 3436 | struct rb_root *root = &cifs_sb->tlink_tree; |
3417 | unsigned long now = jiffies; | 3437 | struct rb_node *node = rb_first(root); |
3418 | unsigned long index = 0; | 3438 | struct rb_node *tmp; |
3419 | int i, ret; | 3439 | struct tcon_link *tlink; |
3420 | 3440 | ||
3421 | do { | 3441 | /* |
3422 | spin_lock(&cifs_sb->tlink_tree_lock); | 3442 | * Because we drop the spinlock in the loop in order to put the tlink |
3423 | ret = radix_tree_gang_lookup(&cifs_sb->tlink_tree, | 3443 | * it's not guarded against removal of links from the tree. The only |
3424 | (void **)tlink, index, | 3444 | * places that remove entries from the tree are this function and |
3425 | ARRAY_SIZE(tlink)); | 3445 | * umounts. Because this function is non-reentrant and is canceled |
3426 | /* increment index for next pass */ | 3446 | * before umount can proceed, this is safe. |
3427 | if (ret > 0) | 3447 | */ |
3428 | index = tlink[ret - 1]->tl_index + 1; | 3448 | spin_lock(&cifs_sb->tlink_tree_lock); |
3429 | for (i = 0; i < ret; i++) { | 3449 | node = rb_first(root); |
3430 | if (test_bit(TCON_LINK_MASTER, &tlink[i]->tl_flags) || | 3450 | while (node != NULL) { |
3431 | atomic_read(&tlink[i]->tl_count) != 0 || | 3451 | tmp = node; |
3432 | time_after(tlink[i]->tl_time + TLINK_IDLE_EXPIRE, | 3452 | node = rb_next(tmp); |
3433 | now)) { | 3453 | tlink = rb_entry(tmp, struct tcon_link, tl_rbnode); |
3434 | tlink[i] = NULL; | 3454 | |
3435 | continue; | 3455 | if (test_bit(TCON_LINK_MASTER, &tlink->tl_flags) || |
3436 | } | 3456 | atomic_read(&tlink->tl_count) != 0 || |
3437 | cifs_get_tlink(tlink[i]); | 3457 | time_after(tlink->tl_time + TLINK_IDLE_EXPIRE, jiffies)) |
3438 | clear_bit(TCON_LINK_IN_TREE, &tlink[i]->tl_flags); | 3458 | continue; |
3439 | radix_tree_delete(&cifs_sb->tlink_tree, | ||
3440 | tlink[i]->tl_index); | ||
3441 | } | ||
3442 | spin_unlock(&cifs_sb->tlink_tree_lock); | ||
3443 | 3459 | ||
3444 | for (i = 0; i < ret; i++) { | 3460 | cifs_get_tlink(tlink); |
3445 | if (tlink[i] != NULL) | 3461 | clear_bit(TCON_LINK_IN_TREE, &tlink->tl_flags); |
3446 | cifs_put_tlink(tlink[i]); | 3462 | rb_erase(tmp, root); |
3447 | } | 3463 | |
3448 | } while (ret != 0); | 3464 | spin_unlock(&cifs_sb->tlink_tree_lock); |
3465 | cifs_put_tlink(tlink); | ||
3466 | spin_lock(&cifs_sb->tlink_tree_lock); | ||
3467 | } | ||
3468 | spin_unlock(&cifs_sb->tlink_tree_lock); | ||
3449 | 3469 | ||
3450 | queue_delayed_work(system_nrt_wq, &cifs_sb->prune_tlinks, | 3470 | queue_delayed_work(system_nrt_wq, &cifs_sb->prune_tlinks, |
3451 | TLINK_IDLE_EXPIRE); | 3471 | TLINK_IDLE_EXPIRE); |
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c index 0eb87026cad3..548f06230a6d 100644 --- a/fs/cifs/dns_resolve.c +++ b/fs/cifs/dns_resolve.c | |||
@@ -66,7 +66,7 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr) | |||
66 | /* Search for server name delimiter */ | 66 | /* Search for server name delimiter */ |
67 | sep = memchr(hostname, '\\', len); | 67 | sep = memchr(hostname, '\\', len); |
68 | if (sep) | 68 | if (sep) |
69 | len = sep - unc; | 69 | len = sep - hostname; |
70 | else | 70 | else |
71 | cFYI(1, "%s: probably server name is whole unc: %s", | 71 | cFYI(1, "%s: probably server name is whole unc: %s", |
72 | __func__, unc); | 72 | __func__, unc); |
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index ae82159cf7fa..5a28660ca2b5 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
@@ -146,12 +146,7 @@ client_can_cache: | |||
146 | rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb, | 146 | rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb, |
147 | xid, NULL); | 147 | xid, NULL); |
148 | 148 | ||
149 | if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) { | 149 | cifs_set_oplock_level(pCifsInode, oplock); |
150 | pCifsInode->clientCanCacheAll = true; | ||
151 | pCifsInode->clientCanCacheRead = true; | ||
152 | cFYI(1, "Exclusive Oplock granted on inode %p", inode); | ||
153 | } else if ((oplock & 0xF) == OPLOCK_READ) | ||
154 | pCifsInode->clientCanCacheRead = true; | ||
155 | 150 | ||
156 | return rc; | 151 | return rc; |
157 | } | 152 | } |
@@ -253,12 +248,7 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file, | |||
253 | list_add_tail(&pCifsFile->flist, &pCifsInode->openFileList); | 248 | list_add_tail(&pCifsFile->flist, &pCifsInode->openFileList); |
254 | spin_unlock(&cifs_file_list_lock); | 249 | spin_unlock(&cifs_file_list_lock); |
255 | 250 | ||
256 | if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) { | 251 | cifs_set_oplock_level(pCifsInode, oplock); |
257 | pCifsInode->clientCanCacheAll = true; | ||
258 | pCifsInode->clientCanCacheRead = true; | ||
259 | cFYI(1, "Exclusive Oplock inode %p", inode); | ||
260 | } else if ((oplock & 0xF) == OPLOCK_READ) | ||
261 | pCifsInode->clientCanCacheRead = true; | ||
262 | 252 | ||
263 | file->private_data = pCifsFile; | 253 | file->private_data = pCifsFile; |
264 | return pCifsFile; | 254 | return pCifsFile; |
@@ -271,8 +261,9 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file, | |||
271 | */ | 261 | */ |
272 | void cifsFileInfo_put(struct cifsFileInfo *cifs_file) | 262 | void cifsFileInfo_put(struct cifsFileInfo *cifs_file) |
273 | { | 263 | { |
264 | struct inode *inode = cifs_file->dentry->d_inode; | ||
274 | struct cifsTconInfo *tcon = tlink_tcon(cifs_file->tlink); | 265 | struct cifsTconInfo *tcon = tlink_tcon(cifs_file->tlink); |
275 | struct cifsInodeInfo *cifsi = CIFS_I(cifs_file->dentry->d_inode); | 266 | struct cifsInodeInfo *cifsi = CIFS_I(inode); |
276 | struct cifsLockInfo *li, *tmp; | 267 | struct cifsLockInfo *li, *tmp; |
277 | 268 | ||
278 | spin_lock(&cifs_file_list_lock); | 269 | spin_lock(&cifs_file_list_lock); |
@@ -288,8 +279,7 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) | |||
288 | if (list_empty(&cifsi->openFileList)) { | 279 | if (list_empty(&cifsi->openFileList)) { |
289 | cFYI(1, "closing last open instance for inode %p", | 280 | cFYI(1, "closing last open instance for inode %p", |
290 | cifs_file->dentry->d_inode); | 281 | cifs_file->dentry->d_inode); |
291 | cifsi->clientCanCacheRead = false; | 282 | cifs_set_oplock_level(cifsi, 0); |
292 | cifsi->clientCanCacheAll = false; | ||
293 | } | 283 | } |
294 | spin_unlock(&cifs_file_list_lock); | 284 | spin_unlock(&cifs_file_list_lock); |
295 | 285 | ||
@@ -607,8 +597,6 @@ reopen_success: | |||
607 | rc = filemap_write_and_wait(inode->i_mapping); | 597 | rc = filemap_write_and_wait(inode->i_mapping); |
608 | mapping_set_error(inode->i_mapping, rc); | 598 | mapping_set_error(inode->i_mapping, rc); |
609 | 599 | ||
610 | pCifsInode->clientCanCacheAll = false; | ||
611 | pCifsInode->clientCanCacheRead = false; | ||
612 | if (tcon->unix_ext) | 600 | if (tcon->unix_ext) |
613 | rc = cifs_get_inode_info_unix(&inode, | 601 | rc = cifs_get_inode_info_unix(&inode, |
614 | full_path, inode->i_sb, xid); | 602 | full_path, inode->i_sb, xid); |
@@ -622,18 +610,9 @@ reopen_success: | |||
622 | invalidate the current end of file on the server | 610 | invalidate the current end of file on the server |
623 | we can not go to the server to get the new inod | 611 | we can not go to the server to get the new inod |
624 | info */ | 612 | info */ |
625 | if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) { | 613 | |
626 | pCifsInode->clientCanCacheAll = true; | 614 | cifs_set_oplock_level(pCifsInode, oplock); |
627 | pCifsInode->clientCanCacheRead = true; | 615 | |
628 | cFYI(1, "Exclusive Oplock granted on inode %p", | ||
629 | pCifsFile->dentry->d_inode); | ||
630 | } else if ((oplock & 0xF) == OPLOCK_READ) { | ||
631 | pCifsInode->clientCanCacheRead = true; | ||
632 | pCifsInode->clientCanCacheAll = false; | ||
633 | } else { | ||
634 | pCifsInode->clientCanCacheRead = false; | ||
635 | pCifsInode->clientCanCacheAll = false; | ||
636 | } | ||
637 | cifs_relock_file(pCifsFile); | 616 | cifs_relock_file(pCifsFile); |
638 | 617 | ||
639 | reopen_error_exit: | 618 | reopen_error_exit: |
@@ -775,12 +754,6 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) | |||
775 | 754 | ||
776 | cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); | 755 | cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); |
777 | tcon = tlink_tcon(((struct cifsFileInfo *)file->private_data)->tlink); | 756 | tcon = tlink_tcon(((struct cifsFileInfo *)file->private_data)->tlink); |
778 | |||
779 | if (file->private_data == NULL) { | ||
780 | rc = -EBADF; | ||
781 | FreeXid(xid); | ||
782 | return rc; | ||
783 | } | ||
784 | netfid = ((struct cifsFileInfo *)file->private_data)->netfid; | 757 | netfid = ((struct cifsFileInfo *)file->private_data)->netfid; |
785 | 758 | ||
786 | if ((tcon->ses->capabilities & CAP_UNIX) && | 759 | if ((tcon->ses->capabilities & CAP_UNIX) && |
@@ -956,6 +929,7 @@ cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset, | |||
956 | ssize_t cifs_user_write(struct file *file, const char __user *write_data, | 929 | ssize_t cifs_user_write(struct file *file, const char __user *write_data, |
957 | size_t write_size, loff_t *poffset) | 930 | size_t write_size, loff_t *poffset) |
958 | { | 931 | { |
932 | struct inode *inode = file->f_path.dentry->d_inode; | ||
959 | int rc = 0; | 933 | int rc = 0; |
960 | unsigned int bytes_written = 0; | 934 | unsigned int bytes_written = 0; |
961 | unsigned int total_written; | 935 | unsigned int total_written; |
@@ -963,7 +937,7 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data, | |||
963 | struct cifsTconInfo *pTcon; | 937 | struct cifsTconInfo *pTcon; |
964 | int xid, long_op; | 938 | int xid, long_op; |
965 | struct cifsFileInfo *open_file; | 939 | struct cifsFileInfo *open_file; |
966 | struct cifsInodeInfo *cifsi = CIFS_I(file->f_path.dentry->d_inode); | 940 | struct cifsInodeInfo *cifsi = CIFS_I(inode); |
967 | 941 | ||
968 | cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); | 942 | cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); |
969 | 943 | ||
@@ -1029,21 +1003,17 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data, | |||
1029 | 1003 | ||
1030 | cifs_stats_bytes_written(pTcon, total_written); | 1004 | cifs_stats_bytes_written(pTcon, total_written); |
1031 | 1005 | ||
1032 | /* since the write may have blocked check these pointers again */ | ||
1033 | if ((file->f_path.dentry) && (file->f_path.dentry->d_inode)) { | ||
1034 | struct inode *inode = file->f_path.dentry->d_inode; | ||
1035 | /* Do not update local mtime - server will set its actual value on write | 1006 | /* Do not update local mtime - server will set its actual value on write |
1036 | * inode->i_ctime = inode->i_mtime = | 1007 | * inode->i_ctime = inode->i_mtime = |
1037 | * current_fs_time(inode->i_sb);*/ | 1008 | * current_fs_time(inode->i_sb);*/ |
1038 | if (total_written > 0) { | 1009 | if (total_written > 0) { |
1039 | spin_lock(&inode->i_lock); | 1010 | spin_lock(&inode->i_lock); |
1040 | if (*poffset > file->f_path.dentry->d_inode->i_size) | 1011 | if (*poffset > inode->i_size) |
1041 | i_size_write(file->f_path.dentry->d_inode, | 1012 | i_size_write(inode, *poffset); |
1042 | *poffset); | 1013 | spin_unlock(&inode->i_lock); |
1043 | spin_unlock(&inode->i_lock); | ||
1044 | } | ||
1045 | mark_inode_dirty_sync(file->f_path.dentry->d_inode); | ||
1046 | } | 1014 | } |
1015 | mark_inode_dirty_sync(inode); | ||
1016 | |||
1047 | FreeXid(xid); | 1017 | FreeXid(xid); |
1048 | return total_written; | 1018 | return total_written; |
1049 | } | 1019 | } |
@@ -1138,7 +1108,6 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file, | |||
1138 | return total_written; | 1108 | return total_written; |
1139 | } | 1109 | } |
1140 | 1110 | ||
1141 | #ifdef CONFIG_CIFS_EXPERIMENTAL | ||
1142 | struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode, | 1111 | struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode, |
1143 | bool fsuid_only) | 1112 | bool fsuid_only) |
1144 | { | 1113 | { |
@@ -1172,13 +1141,12 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode, | |||
1172 | spin_unlock(&cifs_file_list_lock); | 1141 | spin_unlock(&cifs_file_list_lock); |
1173 | return NULL; | 1142 | return NULL; |
1174 | } | 1143 | } |
1175 | #endif | ||
1176 | 1144 | ||
1177 | struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode, | 1145 | struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode, |
1178 | bool fsuid_only) | 1146 | bool fsuid_only) |
1179 | { | 1147 | { |
1180 | struct cifsFileInfo *open_file; | 1148 | struct cifsFileInfo *open_file; |
1181 | struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb); | 1149 | struct cifs_sb_info *cifs_sb; |
1182 | bool any_available = false; | 1150 | bool any_available = false; |
1183 | int rc; | 1151 | int rc; |
1184 | 1152 | ||
@@ -1192,6 +1160,8 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode, | |||
1192 | return NULL; | 1160 | return NULL; |
1193 | } | 1161 | } |
1194 | 1162 | ||
1163 | cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb); | ||
1164 | |||
1195 | /* only filter by fsuid on multiuser mounts */ | 1165 | /* only filter by fsuid on multiuser mounts */ |
1196 | if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) | 1166 | if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) |
1197 | fsuid_only = false; | 1167 | fsuid_only = false; |
@@ -2299,8 +2269,10 @@ void cifs_oplock_break_get(struct cifsFileInfo *cfile) | |||
2299 | 2269 | ||
2300 | void cifs_oplock_break_put(struct cifsFileInfo *cfile) | 2270 | void cifs_oplock_break_put(struct cifsFileInfo *cfile) |
2301 | { | 2271 | { |
2272 | struct super_block *sb = cfile->dentry->d_sb; | ||
2273 | |||
2302 | cifsFileInfo_put(cfile); | 2274 | cifsFileInfo_put(cfile); |
2303 | cifs_sb_deactive(cfile->dentry->d_sb); | 2275 | cifs_sb_deactive(sb); |
2304 | } | 2276 | } |
2305 | 2277 | ||
2306 | const struct address_space_operations cifs_addr_ops = { | 2278 | const struct address_space_operations cifs_addr_ops = { |
diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c index a2ad94efcfe6..297a43d0ff7f 100644 --- a/fs/cifs/fscache.c +++ b/fs/cifs/fscache.c | |||
@@ -2,7 +2,7 @@ | |||
2 | * fs/cifs/fscache.c - CIFS filesystem cache interface | 2 | * fs/cifs/fscache.c - CIFS filesystem cache interface |
3 | * | 3 | * |
4 | * Copyright (c) 2010 Novell, Inc. | 4 | * Copyright (c) 2010 Novell, Inc. |
5 | * Author(s): Suresh Jayaraman (sjayaraman@suse.de> | 5 | * Author(s): Suresh Jayaraman <sjayaraman@suse.de> |
6 | * | 6 | * |
7 | * This library is free software; you can redistribute it and/or modify | 7 | * This library is free software; you can redistribute it and/or modify |
8 | * it under the terms of the GNU Lesser General Public License as published | 8 | * it under the terms of the GNU Lesser General Public License as published |
@@ -67,10 +67,12 @@ static void cifs_fscache_enable_inode_cookie(struct inode *inode) | |||
67 | if (cifsi->fscache) | 67 | if (cifsi->fscache) |
68 | return; | 68 | return; |
69 | 69 | ||
70 | cifsi->fscache = fscache_acquire_cookie(tcon->fscache, | 70 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE) { |
71 | cifsi->fscache = fscache_acquire_cookie(tcon->fscache, | ||
71 | &cifs_fscache_inode_object_def, cifsi); | 72 | &cifs_fscache_inode_object_def, cifsi); |
72 | cFYI(1, "CIFS: got FH cookie (0x%p/0x%p)", tcon->fscache, | 73 | cFYI(1, "CIFS: got FH cookie (0x%p/0x%p)", tcon->fscache, |
73 | cifsi->fscache); | 74 | cifsi->fscache); |
75 | } | ||
74 | } | 76 | } |
75 | 77 | ||
76 | void cifs_fscache_release_inode_cookie(struct inode *inode) | 78 | void cifs_fscache_release_inode_cookie(struct inode *inode) |
@@ -101,10 +103,8 @@ void cifs_fscache_set_inode_cookie(struct inode *inode, struct file *filp) | |||
101 | { | 103 | { |
102 | if ((filp->f_flags & O_ACCMODE) != O_RDONLY) | 104 | if ((filp->f_flags & O_ACCMODE) != O_RDONLY) |
103 | cifs_fscache_disable_inode_cookie(inode); | 105 | cifs_fscache_disable_inode_cookie(inode); |
104 | else { | 106 | else |
105 | cifs_fscache_enable_inode_cookie(inode); | 107 | cifs_fscache_enable_inode_cookie(inode); |
106 | cFYI(1, "CIFS: fscache inode cookie set"); | ||
107 | } | ||
108 | } | 108 | } |
109 | 109 | ||
110 | void cifs_fscache_reset_inode_cookie(struct inode *inode) | 110 | void cifs_fscache_reset_inode_cookie(struct inode *inode) |
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 39869c3c3efb..589f3e3f6e00 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c | |||
@@ -686,13 +686,18 @@ int cifs_get_inode_info(struct inode **pinode, | |||
686 | cFYI(1, "cifs_sfu_type failed: %d", tmprc); | 686 | cFYI(1, "cifs_sfu_type failed: %d", tmprc); |
687 | } | 687 | } |
688 | 688 | ||
689 | #ifdef CONFIG_CIFS_EXPERIMENTAL | 689 | #ifdef CONFIG_CIFS_ACL |
690 | /* fill in 0777 bits from ACL */ | 690 | /* fill in 0777 bits from ACL */ |
691 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { | 691 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { |
692 | cFYI(1, "Getting mode bits from ACL"); | 692 | rc = cifs_acl_to_fattr(cifs_sb, &fattr, *pinode, full_path, |
693 | cifs_acl_to_fattr(cifs_sb, &fattr, *pinode, full_path, pfid); | 693 | pfid); |
694 | if (rc) { | ||
695 | cFYI(1, "%s: Getting ACL failed with error: %d", | ||
696 | __func__, rc); | ||
697 | goto cgii_exit; | ||
698 | } | ||
694 | } | 699 | } |
695 | #endif | 700 | #endif /* CONFIG_CIFS_ACL */ |
696 | 701 | ||
697 | /* fill in remaining high mode bits e.g. SUID, VTX */ | 702 | /* fill in remaining high mode bits e.g. SUID, VTX */ |
698 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) | 703 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) |
@@ -723,12 +728,12 @@ static const struct inode_operations cifs_ipc_inode_ops = { | |||
723 | .lookup = cifs_lookup, | 728 | .lookup = cifs_lookup, |
724 | }; | 729 | }; |
725 | 730 | ||
726 | char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb) | 731 | char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb, |
732 | struct cifsTconInfo *tcon) | ||
727 | { | 733 | { |
728 | int pplen = cifs_sb->prepathlen; | 734 | int pplen = cifs_sb->prepathlen; |
729 | int dfsplen; | 735 | int dfsplen; |
730 | char *full_path = NULL; | 736 | char *full_path = NULL; |
731 | struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb); | ||
732 | 737 | ||
733 | /* if no prefix path, simply set path to the root of share to "" */ | 738 | /* if no prefix path, simply set path to the root of share to "" */ |
734 | if (pplen == 0) { | 739 | if (pplen == 0) { |
@@ -870,7 +875,7 @@ struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino) | |||
870 | char *full_path; | 875 | char *full_path; |
871 | struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb); | 876 | struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb); |
872 | 877 | ||
873 | full_path = cifs_build_path_to_root(cifs_sb); | 878 | full_path = cifs_build_path_to_root(cifs_sb, tcon); |
874 | if (full_path == NULL) | 879 | if (full_path == NULL) |
875 | return ERR_PTR(-ENOMEM); | 880 | return ERR_PTR(-ENOMEM); |
876 | 881 | ||
@@ -881,8 +886,10 @@ struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino) | |||
881 | rc = cifs_get_inode_info(&inode, full_path, NULL, sb, | 886 | rc = cifs_get_inode_info(&inode, full_path, NULL, sb, |
882 | xid, NULL); | 887 | xid, NULL); |
883 | 888 | ||
884 | if (!inode) | 889 | if (!inode) { |
885 | return ERR_PTR(rc); | 890 | inode = ERR_PTR(rc); |
891 | goto out; | ||
892 | } | ||
886 | 893 | ||
887 | #ifdef CONFIG_CIFS_FSCACHE | 894 | #ifdef CONFIG_CIFS_FSCACHE |
888 | /* populate tcon->resource_id */ | 895 | /* populate tcon->resource_id */ |
@@ -898,13 +905,11 @@ struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino) | |||
898 | inode->i_uid = cifs_sb->mnt_uid; | 905 | inode->i_uid = cifs_sb->mnt_uid; |
899 | inode->i_gid = cifs_sb->mnt_gid; | 906 | inode->i_gid = cifs_sb->mnt_gid; |
900 | } else if (rc) { | 907 | } else if (rc) { |
901 | kfree(full_path); | ||
902 | _FreeXid(xid); | ||
903 | iget_failed(inode); | 908 | iget_failed(inode); |
904 | return ERR_PTR(rc); | 909 | inode = ERR_PTR(rc); |
905 | } | 910 | } |
906 | 911 | ||
907 | 912 | out: | |
908 | kfree(full_path); | 913 | kfree(full_path); |
909 | /* can not call macro FreeXid here since in a void func | 914 | /* can not call macro FreeXid here since in a void func |
910 | * TODO: This is no longer true | 915 | * TODO: This is no longer true |
@@ -1648,6 +1653,7 @@ static bool | |||
1648 | cifs_inode_needs_reval(struct inode *inode) | 1653 | cifs_inode_needs_reval(struct inode *inode) |
1649 | { | 1654 | { |
1650 | struct cifsInodeInfo *cifs_i = CIFS_I(inode); | 1655 | struct cifsInodeInfo *cifs_i = CIFS_I(inode); |
1656 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | ||
1651 | 1657 | ||
1652 | if (cifs_i->clientCanCacheRead) | 1658 | if (cifs_i->clientCanCacheRead) |
1653 | return false; | 1659 | return false; |
@@ -1658,19 +1664,21 @@ cifs_inode_needs_reval(struct inode *inode) | |||
1658 | if (cifs_i->time == 0) | 1664 | if (cifs_i->time == 0) |
1659 | return true; | 1665 | return true; |
1660 | 1666 | ||
1661 | /* FIXME: the actimeo should be tunable */ | 1667 | if (!time_in_range(jiffies, cifs_i->time, |
1662 | if (time_after_eq(jiffies, cifs_i->time + HZ)) | 1668 | cifs_i->time + cifs_sb->actimeo)) |
1663 | return true; | 1669 | return true; |
1664 | 1670 | ||
1665 | /* hardlinked files w/ noserverino get "special" treatment */ | 1671 | /* hardlinked files w/ noserverino get "special" treatment */ |
1666 | if (!(CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) && | 1672 | if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) && |
1667 | S_ISREG(inode->i_mode) && inode->i_nlink != 1) | 1673 | S_ISREG(inode->i_mode) && inode->i_nlink != 1) |
1668 | return true; | 1674 | return true; |
1669 | 1675 | ||
1670 | return false; | 1676 | return false; |
1671 | } | 1677 | } |
1672 | 1678 | ||
1673 | /* check invalid_mapping flag and zap the cache if it's set */ | 1679 | /* |
1680 | * Zap the cache. Called when invalid_mapping flag is set. | ||
1681 | */ | ||
1674 | static void | 1682 | static void |
1675 | cifs_invalidate_mapping(struct inode *inode) | 1683 | cifs_invalidate_mapping(struct inode *inode) |
1676 | { | 1684 | { |
@@ -2114,11 +2122,16 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) | |||
2114 | 2122 | ||
2115 | if (attrs->ia_valid & ATTR_MODE) { | 2123 | if (attrs->ia_valid & ATTR_MODE) { |
2116 | rc = 0; | 2124 | rc = 0; |
2117 | #ifdef CONFIG_CIFS_EXPERIMENTAL | 2125 | #ifdef CONFIG_CIFS_ACL |
2118 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) | 2126 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { |
2119 | rc = mode_to_acl(inode, full_path, mode); | 2127 | rc = mode_to_cifs_acl(inode, full_path, mode); |
2120 | else | 2128 | if (rc) { |
2121 | #endif | 2129 | cFYI(1, "%s: Setting ACL failed with error: %d", |
2130 | __func__, rc); | ||
2131 | goto cifs_setattr_exit; | ||
2132 | } | ||
2133 | } else | ||
2134 | #endif /* CONFIG_CIFS_ACL */ | ||
2122 | if (((mode & S_IWUGO) == 0) && | 2135 | if (((mode & S_IWUGO) == 0) && |
2123 | (cifsInode->cifsAttrs & ATTR_READONLY) == 0) { | 2136 | (cifsInode->cifsAttrs & ATTR_READONLY) == 0) { |
2124 | 2137 | ||
@@ -2177,7 +2190,6 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) | |||
2177 | 2190 | ||
2178 | setattr_copy(inode, attrs); | 2191 | setattr_copy(inode, attrs); |
2179 | mark_inode_dirty(inode); | 2192 | mark_inode_dirty(inode); |
2180 | return 0; | ||
2181 | 2193 | ||
2182 | cifs_setattr_exit: | 2194 | cifs_setattr_exit: |
2183 | kfree(full_path); | 2195 | kfree(full_path); |
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 077bf756f342..0c98672d0122 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c | |||
@@ -38,10 +38,10 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) | |||
38 | struct cifs_sb_info *cifs_sb; | 38 | struct cifs_sb_info *cifs_sb; |
39 | #ifdef CONFIG_CIFS_POSIX | 39 | #ifdef CONFIG_CIFS_POSIX |
40 | struct cifsFileInfo *pSMBFile = filep->private_data; | 40 | struct cifsFileInfo *pSMBFile = filep->private_data; |
41 | struct cifsTconInfo *tcon = tlink_tcon(pSMBFile->tlink); | 41 | struct cifsTconInfo *tcon; |
42 | __u64 ExtAttrBits = 0; | 42 | __u64 ExtAttrBits = 0; |
43 | __u64 ExtAttrMask = 0; | 43 | __u64 ExtAttrMask = 0; |
44 | __u64 caps = le64_to_cpu(tcon->fsUnixInfo.Capability); | 44 | __u64 caps; |
45 | #endif /* CONFIG_CIFS_POSIX */ | 45 | #endif /* CONFIG_CIFS_POSIX */ |
46 | 46 | ||
47 | xid = GetXid(); | 47 | xid = GetXid(); |
@@ -62,9 +62,11 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) | |||
62 | break; | 62 | break; |
63 | #ifdef CONFIG_CIFS_POSIX | 63 | #ifdef CONFIG_CIFS_POSIX |
64 | case FS_IOC_GETFLAGS: | 64 | case FS_IOC_GETFLAGS: |
65 | if (pSMBFile == NULL) | ||
66 | break; | ||
67 | tcon = tlink_tcon(pSMBFile->tlink); | ||
68 | caps = le64_to_cpu(tcon->fsUnixInfo.Capability); | ||
65 | if (CIFS_UNIX_EXTATTR_CAP & caps) { | 69 | if (CIFS_UNIX_EXTATTR_CAP & caps) { |
66 | if (pSMBFile == NULL) | ||
67 | break; | ||
68 | rc = CIFSGetExtAttr(xid, tcon, pSMBFile->netfid, | 70 | rc = CIFSGetExtAttr(xid, tcon, pSMBFile->netfid, |
69 | &ExtAttrBits, &ExtAttrMask); | 71 | &ExtAttrBits, &ExtAttrMask); |
70 | if (rc == 0) | 72 | if (rc == 0) |
@@ -75,13 +77,15 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) | |||
75 | break; | 77 | break; |
76 | 78 | ||
77 | case FS_IOC_SETFLAGS: | 79 | case FS_IOC_SETFLAGS: |
80 | if (pSMBFile == NULL) | ||
81 | break; | ||
82 | tcon = tlink_tcon(pSMBFile->tlink); | ||
83 | caps = le64_to_cpu(tcon->fsUnixInfo.Capability); | ||
78 | if (CIFS_UNIX_EXTATTR_CAP & caps) { | 84 | if (CIFS_UNIX_EXTATTR_CAP & caps) { |
79 | if (get_user(ExtAttrBits, (int __user *)arg)) { | 85 | if (get_user(ExtAttrBits, (int __user *)arg)) { |
80 | rc = -EFAULT; | 86 | rc = -EFAULT; |
81 | break; | 87 | break; |
82 | } | 88 | } |
83 | if (pSMBFile == NULL) | ||
84 | break; | ||
85 | /* rc= CIFSGetExtAttr(xid,tcon,pSMBFile->netfid, | 89 | /* rc= CIFSGetExtAttr(xid,tcon,pSMBFile->netfid, |
86 | extAttrBits, &ExtAttrMask);*/ | 90 | extAttrBits, &ExtAttrMask);*/ |
87 | } | 91 | } |
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index c4e296fe3518..43f10281bc19 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c | |||
@@ -569,10 +569,9 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv) | |||
569 | 569 | ||
570 | cFYI(1, "file id match, oplock break"); | 570 | cFYI(1, "file id match, oplock break"); |
571 | pCifsInode = CIFS_I(netfile->dentry->d_inode); | 571 | pCifsInode = CIFS_I(netfile->dentry->d_inode); |
572 | pCifsInode->clientCanCacheAll = false; | ||
573 | if (pSMB->OplockLevel == 0) | ||
574 | pCifsInode->clientCanCacheRead = false; | ||
575 | 572 | ||
573 | cifs_set_oplock_level(pCifsInode, | ||
574 | pSMB->OplockLevel); | ||
576 | /* | 575 | /* |
577 | * cifs_oplock_break_put() can't be called | 576 | * cifs_oplock_break_put() can't be called |
578 | * from here. Get reference after queueing | 577 | * from here. Get reference after queueing |
@@ -722,3 +721,23 @@ cifs_autodisable_serverino(struct cifs_sb_info *cifs_sb) | |||
722 | cifs_sb_master_tcon(cifs_sb)->treeName); | 721 | cifs_sb_master_tcon(cifs_sb)->treeName); |
723 | } | 722 | } |
724 | } | 723 | } |
724 | |||
725 | void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock) | ||
726 | { | ||
727 | oplock &= 0xF; | ||
728 | |||
729 | if (oplock == OPLOCK_EXCLUSIVE) { | ||
730 | cinode->clientCanCacheAll = true; | ||
731 | cinode->clientCanCacheRead = true; | ||
732 | cFYI(1, "Exclusive Oplock granted on inode %p", | ||
733 | &cinode->vfs_inode); | ||
734 | } else if (oplock == OPLOCK_READ) { | ||
735 | cinode->clientCanCacheAll = false; | ||
736 | cinode->clientCanCacheRead = true; | ||
737 | cFYI(1, "Level II Oplock granted on inode %p", | ||
738 | &cinode->vfs_inode); | ||
739 | } else { | ||
740 | cinode->clientCanCacheAll = false; | ||
741 | cinode->clientCanCacheRead = false; | ||
742 | } | ||
743 | } | ||
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index ef7bb7b50f58..a73eb9f4bdaf 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c | |||
@@ -226,26 +226,29 @@ static int initiate_cifs_search(const int xid, struct file *file) | |||
226 | char *full_path = NULL; | 226 | char *full_path = NULL; |
227 | struct cifsFileInfo *cifsFile; | 227 | struct cifsFileInfo *cifsFile; |
228 | struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); | 228 | struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); |
229 | struct tcon_link *tlink; | 229 | struct tcon_link *tlink = NULL; |
230 | struct cifsTconInfo *pTcon; | 230 | struct cifsTconInfo *pTcon; |
231 | 231 | ||
232 | tlink = cifs_sb_tlink(cifs_sb); | ||
233 | if (IS_ERR(tlink)) | ||
234 | return PTR_ERR(tlink); | ||
235 | pTcon = tlink_tcon(tlink); | ||
236 | |||
237 | if (file->private_data == NULL) | ||
238 | file->private_data = | ||
239 | kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); | ||
240 | if (file->private_data == NULL) { | 232 | if (file->private_data == NULL) { |
241 | rc = -ENOMEM; | 233 | tlink = cifs_sb_tlink(cifs_sb); |
242 | goto error_exit; | 234 | if (IS_ERR(tlink)) |
235 | return PTR_ERR(tlink); | ||
236 | |||
237 | cifsFile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); | ||
238 | if (cifsFile == NULL) { | ||
239 | rc = -ENOMEM; | ||
240 | goto error_exit; | ||
241 | } | ||
242 | file->private_data = cifsFile; | ||
243 | cifsFile->tlink = cifs_get_tlink(tlink); | ||
244 | pTcon = tlink_tcon(tlink); | ||
245 | } else { | ||
246 | cifsFile = file->private_data; | ||
247 | pTcon = tlink_tcon(cifsFile->tlink); | ||
243 | } | 248 | } |
244 | 249 | ||
245 | cifsFile = file->private_data; | ||
246 | cifsFile->invalidHandle = true; | 250 | cifsFile->invalidHandle = true; |
247 | cifsFile->srch_inf.endOfSearch = false; | 251 | cifsFile->srch_inf.endOfSearch = false; |
248 | cifsFile->tlink = cifs_get_tlink(tlink); | ||
249 | 252 | ||
250 | full_path = build_path_from_dentry(file->f_path.dentry); | 253 | full_path = build_path_from_dentry(file->f_path.dentry); |
251 | if (full_path == NULL) { | 254 | if (full_path == NULL) { |
@@ -756,18 +759,6 @@ static int cifs_filldir(char *pfindEntry, struct file *file, filldir_t filldir, | |||
756 | rc = filldir(direntry, qstring.name, qstring.len, file->f_pos, | 759 | rc = filldir(direntry, qstring.name, qstring.len, file->f_pos, |
757 | ino, fattr.cf_dtype); | 760 | ino, fattr.cf_dtype); |
758 | 761 | ||
759 | /* | ||
760 | * we can not return filldir errors to the caller since they are | ||
761 | * "normal" when the stat blocksize is too small - we return remapped | ||
762 | * error instead | ||
763 | * | ||
764 | * FIXME: This looks bogus. filldir returns -EOVERFLOW in the above | ||
765 | * case already. Why should we be clobbering other errors from it? | ||
766 | */ | ||
767 | if (rc) { | ||
768 | cFYI(1, "filldir rc = %d", rc); | ||
769 | rc = -EOVERFLOW; | ||
770 | } | ||
771 | dput(tmp_dentry); | 762 | dput(tmp_dentry); |
772 | return rc; | 763 | return rc; |
773 | } | 764 | } |
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index a264b744bb41..eae2a1491608 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c | |||
@@ -30,10 +30,11 @@ | |||
30 | 30 | ||
31 | #define MAX_EA_VALUE_SIZE 65535 | 31 | #define MAX_EA_VALUE_SIZE 65535 |
32 | #define CIFS_XATTR_DOS_ATTRIB "user.DosAttrib" | 32 | #define CIFS_XATTR_DOS_ATTRIB "user.DosAttrib" |
33 | #define CIFS_XATTR_CIFS_ACL "system.cifs_acl" | ||
33 | #define CIFS_XATTR_USER_PREFIX "user." | 34 | #define CIFS_XATTR_USER_PREFIX "user." |
34 | #define CIFS_XATTR_SYSTEM_PREFIX "system." | 35 | #define CIFS_XATTR_SYSTEM_PREFIX "system." |
35 | #define CIFS_XATTR_OS2_PREFIX "os2." | 36 | #define CIFS_XATTR_OS2_PREFIX "os2." |
36 | #define CIFS_XATTR_SECURITY_PREFIX ".security" | 37 | #define CIFS_XATTR_SECURITY_PREFIX "security." |
37 | #define CIFS_XATTR_TRUSTED_PREFIX "trusted." | 38 | #define CIFS_XATTR_TRUSTED_PREFIX "trusted." |
38 | #define XATTR_TRUSTED_PREFIX_LEN 8 | 39 | #define XATTR_TRUSTED_PREFIX_LEN 8 |
39 | #define XATTR_SECURITY_PREFIX_LEN 9 | 40 | #define XATTR_SECURITY_PREFIX_LEN 9 |
@@ -277,29 +278,8 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name, | |||
277 | cifs_sb->local_nls, | 278 | cifs_sb->local_nls, |
278 | cifs_sb->mnt_cifs_flags & | 279 | cifs_sb->mnt_cifs_flags & |
279 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 280 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
280 | #ifdef CONFIG_CIFS_EXPERIMENTAL | ||
281 | else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { | ||
282 | __u16 fid; | ||
283 | int oplock = 0; | ||
284 | struct cifs_ntsd *pacl = NULL; | ||
285 | __u32 buflen = 0; | ||
286 | if (experimEnabled) | ||
287 | rc = CIFSSMBOpen(xid, pTcon, full_path, | ||
288 | FILE_OPEN, GENERIC_READ, 0, &fid, | ||
289 | &oplock, NULL, cifs_sb->local_nls, | ||
290 | cifs_sb->mnt_cifs_flags & | ||
291 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
292 | /* else rc is EOPNOTSUPP from above */ | ||
293 | |||
294 | if (rc == 0) { | ||
295 | rc = CIFSSMBGetCIFSACL(xid, pTcon, fid, &pacl, | ||
296 | &buflen); | ||
297 | CIFSSMBClose(xid, pTcon, fid); | ||
298 | } | ||
299 | } | ||
300 | #endif /* EXPERIMENTAL */ | ||
301 | #else | 281 | #else |
302 | cFYI(1, "query POSIX ACL not supported yet"); | 282 | cFYI(1, "Query POSIX ACL not supported yet"); |
303 | #endif /* CONFIG_CIFS_POSIX */ | 283 | #endif /* CONFIG_CIFS_POSIX */ |
304 | } else if (strncmp(ea_name, POSIX_ACL_XATTR_DEFAULT, | 284 | } else if (strncmp(ea_name, POSIX_ACL_XATTR_DEFAULT, |
305 | strlen(POSIX_ACL_XATTR_DEFAULT)) == 0) { | 285 | strlen(POSIX_ACL_XATTR_DEFAULT)) == 0) { |
@@ -311,8 +291,33 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name, | |||
311 | cifs_sb->mnt_cifs_flags & | 291 | cifs_sb->mnt_cifs_flags & |
312 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 292 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
313 | #else | 293 | #else |
314 | cFYI(1, "query POSIX default ACL not supported yet"); | 294 | cFYI(1, "Query POSIX default ACL not supported yet"); |
315 | #endif | 295 | #endif /* CONFIG_CIFS_POSIX */ |
296 | } else if (strncmp(ea_name, CIFS_XATTR_CIFS_ACL, | ||
297 | strlen(CIFS_XATTR_CIFS_ACL)) == 0) { | ||
298 | #ifdef CONFIG_CIFS_ACL | ||
299 | u32 acllen; | ||
300 | struct cifs_ntsd *pacl; | ||
301 | |||
302 | pacl = get_cifs_acl(cifs_sb, direntry->d_inode, | ||
303 | full_path, &acllen); | ||
304 | if (IS_ERR(pacl)) { | ||
305 | rc = PTR_ERR(pacl); | ||
306 | cERROR(1, "%s: error %zd getting sec desc", | ||
307 | __func__, rc); | ||
308 | } else { | ||
309 | if (ea_value) { | ||
310 | if (acllen > buf_size) | ||
311 | acllen = -ERANGE; | ||
312 | else | ||
313 | memcpy(ea_value, pacl, acllen); | ||
314 | } | ||
315 | rc = acllen; | ||
316 | kfree(pacl); | ||
317 | } | ||
318 | #else | ||
319 | cFYI(1, "Query CIFS ACL not supported yet"); | ||
320 | #endif /* CONFIG_CIFS_ACL */ | ||
316 | } else if (strncmp(ea_name, | 321 | } else if (strncmp(ea_name, |
317 | CIFS_XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) == 0) { | 322 | CIFS_XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) == 0) { |
318 | cFYI(1, "Trusted xattr namespace not supported yet"); | 323 | cFYI(1, "Trusted xattr namespace not supported yet"); |
diff --git a/fs/compat.c b/fs/compat.c index c580c322fa6b..eb1740ac8c0a 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
@@ -1350,6 +1350,10 @@ static int compat_count(compat_uptr_t __user *argv, int max) | |||
1350 | argv++; | 1350 | argv++; |
1351 | if (i++ >= max) | 1351 | if (i++ >= max) |
1352 | return -E2BIG; | 1352 | return -E2BIG; |
1353 | |||
1354 | if (fatal_signal_pending(current)) | ||
1355 | return -ERESTARTNOHAND; | ||
1356 | cond_resched(); | ||
1353 | } | 1357 | } |
1354 | } | 1358 | } |
1355 | return i; | 1359 | return i; |
@@ -1391,6 +1395,12 @@ static int compat_copy_strings(int argc, compat_uptr_t __user *argv, | |||
1391 | while (len > 0) { | 1395 | while (len > 0) { |
1392 | int offset, bytes_to_copy; | 1396 | int offset, bytes_to_copy; |
1393 | 1397 | ||
1398 | if (fatal_signal_pending(current)) { | ||
1399 | ret = -ERESTARTNOHAND; | ||
1400 | goto out; | ||
1401 | } | ||
1402 | cond_resched(); | ||
1403 | |||
1394 | offset = pos % PAGE_SIZE; | 1404 | offset = pos % PAGE_SIZE; |
1395 | if (offset == 0) | 1405 | if (offset == 0) |
1396 | offset = PAGE_SIZE; | 1406 | offset = PAGE_SIZE; |
@@ -1407,18 +1417,8 @@ static int compat_copy_strings(int argc, compat_uptr_t __user *argv, | |||
1407 | if (!kmapped_page || kpos != (pos & PAGE_MASK)) { | 1417 | if (!kmapped_page || kpos != (pos & PAGE_MASK)) { |
1408 | struct page *page; | 1418 | struct page *page; |
1409 | 1419 | ||
1410 | #ifdef CONFIG_STACK_GROWSUP | 1420 | page = get_arg_page(bprm, pos, 1); |
1411 | ret = expand_stack_downwards(bprm->vma, pos); | 1421 | if (!page) { |
1412 | if (ret < 0) { | ||
1413 | /* We've exceed the stack rlimit. */ | ||
1414 | ret = -E2BIG; | ||
1415 | goto out; | ||
1416 | } | ||
1417 | #endif | ||
1418 | ret = get_user_pages(current, bprm->mm, pos, | ||
1419 | 1, 1, 1, &page, NULL); | ||
1420 | if (ret <= 0) { | ||
1421 | /* We've exceed the stack rlimit. */ | ||
1422 | ret = -E2BIG; | 1422 | ret = -E2BIG; |
1423 | goto out; | 1423 | goto out; |
1424 | } | 1424 | } |
@@ -1539,8 +1539,10 @@ int compat_do_execve(char * filename, | |||
1539 | return retval; | 1539 | return retval; |
1540 | 1540 | ||
1541 | out: | 1541 | out: |
1542 | if (bprm->mm) | 1542 | if (bprm->mm) { |
1543 | acct_arg_size(bprm, 0); | ||
1543 | mmput(bprm->mm); | 1544 | mmput(bprm->mm); |
1545 | } | ||
1544 | 1546 | ||
1545 | out_file: | 1547 | out_file: |
1546 | if (bprm->file) { | 1548 | if (bprm->file) { |
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 410ed188faa1..a60579b007b0 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c | |||
@@ -19,7 +19,6 @@ | |||
19 | #include <linux/compiler.h> | 19 | #include <linux/compiler.h> |
20 | #include <linux/sched.h> | 20 | #include <linux/sched.h> |
21 | #include <linux/smp.h> | 21 | #include <linux/smp.h> |
22 | #include <linux/smp_lock.h> | ||
23 | #include <linux/ioctl.h> | 22 | #include <linux/ioctl.h> |
24 | #include <linux/if.h> | 23 | #include <linux/if.h> |
25 | #include <linux/if_bridge.h> | 24 | #include <linux/if_bridge.h> |
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c index 253732382d37..2720178b7718 100644 --- a/fs/ecryptfs/super.c +++ b/fs/ecryptfs/super.c | |||
@@ -28,7 +28,6 @@ | |||
28 | #include <linux/key.h> | 28 | #include <linux/key.h> |
29 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
30 | #include <linux/seq_file.h> | 30 | #include <linux/seq_file.h> |
31 | #include <linux/smp_lock.h> | ||
32 | #include <linux/file.h> | 31 | #include <linux/file.h> |
33 | #include <linux/crypto.h> | 32 | #include <linux/crypto.h> |
34 | #include "ecryptfs_kernel.h" | 33 | #include "ecryptfs_kernel.h" |
@@ -164,7 +164,26 @@ out: | |||
164 | 164 | ||
165 | #ifdef CONFIG_MMU | 165 | #ifdef CONFIG_MMU |
166 | 166 | ||
167 | static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, | 167 | void acct_arg_size(struct linux_binprm *bprm, unsigned long pages) |
168 | { | ||
169 | struct mm_struct *mm = current->mm; | ||
170 | long diff = (long)(pages - bprm->vma_pages); | ||
171 | |||
172 | if (!mm || !diff) | ||
173 | return; | ||
174 | |||
175 | bprm->vma_pages = pages; | ||
176 | |||
177 | #ifdef SPLIT_RSS_COUNTING | ||
178 | add_mm_counter(mm, MM_ANONPAGES, diff); | ||
179 | #else | ||
180 | spin_lock(&mm->page_table_lock); | ||
181 | add_mm_counter(mm, MM_ANONPAGES, diff); | ||
182 | spin_unlock(&mm->page_table_lock); | ||
183 | #endif | ||
184 | } | ||
185 | |||
186 | struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, | ||
168 | int write) | 187 | int write) |
169 | { | 188 | { |
170 | struct page *page; | 189 | struct page *page; |
@@ -186,6 +205,8 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, | |||
186 | unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start; | 205 | unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start; |
187 | struct rlimit *rlim; | 206 | struct rlimit *rlim; |
188 | 207 | ||
208 | acct_arg_size(bprm, size / PAGE_SIZE); | ||
209 | |||
189 | /* | 210 | /* |
190 | * We've historically supported up to 32 pages (ARG_MAX) | 211 | * We've historically supported up to 32 pages (ARG_MAX) |
191 | * of argument strings even with small stacks | 212 | * of argument strings even with small stacks |
@@ -254,6 +275,11 @@ static int __bprm_mm_init(struct linux_binprm *bprm) | |||
254 | vma->vm_flags = VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP; | 275 | vma->vm_flags = VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP; |
255 | vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); | 276 | vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); |
256 | INIT_LIST_HEAD(&vma->anon_vma_chain); | 277 | INIT_LIST_HEAD(&vma->anon_vma_chain); |
278 | |||
279 | err = security_file_mmap(NULL, 0, 0, 0, vma->vm_start, 1); | ||
280 | if (err) | ||
281 | goto err; | ||
282 | |||
257 | err = insert_vm_struct(mm, vma); | 283 | err = insert_vm_struct(mm, vma); |
258 | if (err) | 284 | if (err) |
259 | goto err; | 285 | goto err; |
@@ -276,7 +302,11 @@ static bool valid_arg_len(struct linux_binprm *bprm, long len) | |||
276 | 302 | ||
277 | #else | 303 | #else |
278 | 304 | ||
279 | static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, | 305 | void acct_arg_size(struct linux_binprm *bprm, unsigned long pages) |
306 | { | ||
307 | } | ||
308 | |||
309 | struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, | ||
280 | int write) | 310 | int write) |
281 | { | 311 | { |
282 | struct page *page; | 312 | struct page *page; |
@@ -1003,6 +1033,7 @@ int flush_old_exec(struct linux_binprm * bprm) | |||
1003 | /* | 1033 | /* |
1004 | * Release all of the old mmap stuff | 1034 | * Release all of the old mmap stuff |
1005 | */ | 1035 | */ |
1036 | acct_arg_size(bprm, 0); | ||
1006 | retval = exec_mmap(bprm->mm); | 1037 | retval = exec_mmap(bprm->mm); |
1007 | if (retval) | 1038 | if (retval) |
1008 | goto out; | 1039 | goto out; |
@@ -1426,8 +1457,10 @@ int do_execve(const char * filename, | |||
1426 | return retval; | 1457 | return retval; |
1427 | 1458 | ||
1428 | out: | 1459 | out: |
1429 | if (bprm->mm) | 1460 | if (bprm->mm) { |
1430 | mmput (bprm->mm); | 1461 | acct_arg_size(bprm, 0); |
1462 | mmput(bprm->mm); | ||
1463 | } | ||
1431 | 1464 | ||
1432 | out_file: | 1465 | out_file: |
1433 | if (bprm->file) { | 1466 | if (bprm->file) { |
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 2fedaf8b5012..acf8695fa8f0 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
@@ -27,7 +27,6 @@ | |||
27 | #include <linux/init.h> | 27 | #include <linux/init.h> |
28 | #include <linux/blkdev.h> | 28 | #include <linux/blkdev.h> |
29 | #include <linux/parser.h> | 29 | #include <linux/parser.h> |
30 | #include <linux/smp_lock.h> | ||
31 | #include <linux/buffer_head.h> | 30 | #include <linux/buffer_head.h> |
32 | #include <linux/exportfs.h> | 31 | #include <linux/exportfs.h> |
33 | #include <linux/vfs.h> | 32 | #include <linux/vfs.h> |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 47162de0b957..1f253a9a141d 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -177,7 +177,7 @@ struct mpage_da_data { | |||
177 | 177 | ||
178 | struct ext4_io_page { | 178 | struct ext4_io_page { |
179 | struct page *p_page; | 179 | struct page *p_page; |
180 | int p_count; | 180 | atomic_t p_count; |
181 | }; | 181 | }; |
182 | 182 | ||
183 | #define MAX_IO_PAGES 128 | 183 | #define MAX_IO_PAGES 128 |
@@ -858,6 +858,7 @@ struct ext4_inode_info { | |||
858 | spinlock_t i_completed_io_lock; | 858 | spinlock_t i_completed_io_lock; |
859 | /* current io_end structure for async DIO write*/ | 859 | /* current io_end structure for async DIO write*/ |
860 | ext4_io_end_t *cur_aio_dio; | 860 | ext4_io_end_t *cur_aio_dio; |
861 | atomic_t i_ioend_count; /* Number of outstanding io_end structs */ | ||
861 | 862 | ||
862 | /* | 863 | /* |
863 | * Transactions that contain inode's metadata needed to complete | 864 | * Transactions that contain inode's metadata needed to complete |
@@ -909,6 +910,7 @@ struct ext4_inode_info { | |||
909 | #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ | 910 | #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ |
910 | #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ | 911 | #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ |
911 | #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ | 912 | #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ |
913 | #define EXT4_MOUNT_MBLK_IO_SUBMIT 0x4000000 /* multi-block io submits */ | ||
912 | #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ | 914 | #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ |
913 | #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ | 915 | #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ |
914 | #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ | 916 | #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ |
@@ -2060,6 +2062,7 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
2060 | /* page-io.c */ | 2062 | /* page-io.c */ |
2061 | extern int __init ext4_init_pageio(void); | 2063 | extern int __init ext4_init_pageio(void); |
2062 | extern void ext4_exit_pageio(void); | 2064 | extern void ext4_exit_pageio(void); |
2065 | extern void ext4_ioend_wait(struct inode *); | ||
2063 | extern void ext4_free_io_end(ext4_io_end_t *io); | 2066 | extern void ext4_free_io_end(ext4_io_end_t *io); |
2064 | extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); | 2067 | extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); |
2065 | extern int ext4_end_io_nolock(ext4_io_end_t *io); | 2068 | extern int ext4_end_io_nolock(ext4_io_end_t *io); |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index b6a4b41d7e14..ef9d5be0b2a8 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -53,6 +53,7 @@ | |||
53 | static inline int ext4_begin_ordered_truncate(struct inode *inode, | 53 | static inline int ext4_begin_ordered_truncate(struct inode *inode, |
54 | loff_t new_size) | 54 | loff_t new_size) |
55 | { | 55 | { |
56 | trace_ext4_begin_ordered_truncate(inode, new_size); | ||
56 | return jbd2_journal_begin_ordered_truncate( | 57 | return jbd2_journal_begin_ordered_truncate( |
57 | EXT4_SB(inode->i_sb)->s_journal, | 58 | EXT4_SB(inode->i_sb)->s_journal, |
58 | &EXT4_I(inode)->jinode, | 59 | &EXT4_I(inode)->jinode, |
@@ -178,6 +179,7 @@ void ext4_evict_inode(struct inode *inode) | |||
178 | handle_t *handle; | 179 | handle_t *handle; |
179 | int err; | 180 | int err; |
180 | 181 | ||
182 | trace_ext4_evict_inode(inode); | ||
181 | if (inode->i_nlink) { | 183 | if (inode->i_nlink) { |
182 | truncate_inode_pages(&inode->i_data, 0); | 184 | truncate_inode_pages(&inode->i_data, 0); |
183 | goto no_delete; | 185 | goto no_delete; |
@@ -2123,9 +2125,12 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, | |||
2123 | */ | 2125 | */ |
2124 | if (unlikely(journal_data && PageChecked(page))) | 2126 | if (unlikely(journal_data && PageChecked(page))) |
2125 | err = __ext4_journalled_writepage(page, len); | 2127 | err = __ext4_journalled_writepage(page, len); |
2126 | else | 2128 | else if (test_opt(inode->i_sb, MBLK_IO_SUBMIT)) |
2127 | err = ext4_bio_write_page(&io_submit, page, | 2129 | err = ext4_bio_write_page(&io_submit, page, |
2128 | len, mpd->wbc); | 2130 | len, mpd->wbc); |
2131 | else | ||
2132 | err = block_write_full_page(page, | ||
2133 | noalloc_get_block_write, mpd->wbc); | ||
2129 | 2134 | ||
2130 | if (!err) | 2135 | if (!err) |
2131 | mpd->pages_written++; | 2136 | mpd->pages_written++; |
@@ -5410,9 +5415,7 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
5410 | * will return the blocks that include the delayed allocation | 5415 | * will return the blocks that include the delayed allocation |
5411 | * blocks for this file. | 5416 | * blocks for this file. |
5412 | */ | 5417 | */ |
5413 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | ||
5414 | delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks; | 5418 | delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks; |
5415 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | ||
5416 | 5419 | ||
5417 | stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9; | 5420 | stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9; |
5418 | return 0; | 5421 | return 0; |
@@ -5649,6 +5652,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) | |||
5649 | int err, ret; | 5652 | int err, ret; |
5650 | 5653 | ||
5651 | might_sleep(); | 5654 | might_sleep(); |
5655 | trace_ext4_mark_inode_dirty(inode, _RET_IP_); | ||
5652 | err = ext4_reserve_inode_write(handle, inode, &iloc); | 5656 | err = ext4_reserve_inode_write(handle, inode, &iloc); |
5653 | if (ext4_handle_valid(handle) && | 5657 | if (ext4_handle_valid(handle) && |
5654 | EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && | 5658 | EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index bf5ae883b1bd..eb3bc2fe647e 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
@@ -331,6 +331,30 @@ mext_out: | |||
331 | return err; | 331 | return err; |
332 | } | 332 | } |
333 | 333 | ||
334 | case FITRIM: | ||
335 | { | ||
336 | struct super_block *sb = inode->i_sb; | ||
337 | struct fstrim_range range; | ||
338 | int ret = 0; | ||
339 | |||
340 | if (!capable(CAP_SYS_ADMIN)) | ||
341 | return -EPERM; | ||
342 | |||
343 | if (copy_from_user(&range, (struct fstrim_range *)arg, | ||
344 | sizeof(range))) | ||
345 | return -EFAULT; | ||
346 | |||
347 | ret = ext4_trim_fs(sb, &range); | ||
348 | if (ret < 0) | ||
349 | return ret; | ||
350 | |||
351 | if (copy_to_user((struct fstrim_range *)arg, &range, | ||
352 | sizeof(range))) | ||
353 | return -EFAULT; | ||
354 | |||
355 | return 0; | ||
356 | } | ||
357 | |||
334 | default: | 358 | default: |
335 | return -ENOTTY; | 359 | return -ENOTTY; |
336 | } | 360 | } |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index c58eba34724a..5b4d4e3a4d58 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -4640,8 +4640,6 @@ do_more: | |||
4640 | * with group lock held. generate_buddy look at | 4640 | * with group lock held. generate_buddy look at |
4641 | * them with group lock_held | 4641 | * them with group lock_held |
4642 | */ | 4642 | */ |
4643 | if (test_opt(sb, DISCARD)) | ||
4644 | ext4_issue_discard(sb, block_group, bit, count); | ||
4645 | ext4_lock_group(sb, block_group); | 4643 | ext4_lock_group(sb, block_group); |
4646 | mb_clear_bits(bitmap_bh->b_data, bit, count); | 4644 | mb_clear_bits(bitmap_bh->b_data, bit, count); |
4647 | mb_free_blocks(inode, &e4b, bit, count); | 4645 | mb_free_blocks(inode, &e4b, bit, count); |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 92203b8a099f..dc40e75cba88 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -872,7 +872,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, | |||
872 | if (namelen > EXT4_NAME_LEN) | 872 | if (namelen > EXT4_NAME_LEN) |
873 | return NULL; | 873 | return NULL; |
874 | if ((namelen <= 2) && (name[0] == '.') && | 874 | if ((namelen <= 2) && (name[0] == '.') && |
875 | (name[1] == '.' || name[1] == '0')) { | 875 | (name[1] == '.' || name[1] == '\0')) { |
876 | /* | 876 | /* |
877 | * "." or ".." will only be in the first block | 877 | * "." or ".." will only be in the first block |
878 | * NFS may look up ".."; "." should be handled by the VFS | 878 | * NFS may look up ".."; "." should be handled by the VFS |
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 46a7d6a9d976..beacce11ac50 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c | |||
@@ -32,8 +32,14 @@ | |||
32 | 32 | ||
33 | static struct kmem_cache *io_page_cachep, *io_end_cachep; | 33 | static struct kmem_cache *io_page_cachep, *io_end_cachep; |
34 | 34 | ||
35 | #define WQ_HASH_SZ 37 | ||
36 | #define to_ioend_wq(v) (&ioend_wq[((unsigned long)v) % WQ_HASH_SZ]) | ||
37 | static wait_queue_head_t ioend_wq[WQ_HASH_SZ]; | ||
38 | |||
35 | int __init ext4_init_pageio(void) | 39 | int __init ext4_init_pageio(void) |
36 | { | 40 | { |
41 | int i; | ||
42 | |||
37 | io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT); | 43 | io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT); |
38 | if (io_page_cachep == NULL) | 44 | if (io_page_cachep == NULL) |
39 | return -ENOMEM; | 45 | return -ENOMEM; |
@@ -42,6 +48,8 @@ int __init ext4_init_pageio(void) | |||
42 | kmem_cache_destroy(io_page_cachep); | 48 | kmem_cache_destroy(io_page_cachep); |
43 | return -ENOMEM; | 49 | return -ENOMEM; |
44 | } | 50 | } |
51 | for (i = 0; i < WQ_HASH_SZ; i++) | ||
52 | init_waitqueue_head(&ioend_wq[i]); | ||
45 | 53 | ||
46 | return 0; | 54 | return 0; |
47 | } | 55 | } |
@@ -52,24 +60,37 @@ void ext4_exit_pageio(void) | |||
52 | kmem_cache_destroy(io_page_cachep); | 60 | kmem_cache_destroy(io_page_cachep); |
53 | } | 61 | } |
54 | 62 | ||
63 | void ext4_ioend_wait(struct inode *inode) | ||
64 | { | ||
65 | wait_queue_head_t *wq = to_ioend_wq(inode); | ||
66 | |||
67 | wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0)); | ||
68 | } | ||
69 | |||
70 | static void put_io_page(struct ext4_io_page *io_page) | ||
71 | { | ||
72 | if (atomic_dec_and_test(&io_page->p_count)) { | ||
73 | end_page_writeback(io_page->p_page); | ||
74 | put_page(io_page->p_page); | ||
75 | kmem_cache_free(io_page_cachep, io_page); | ||
76 | } | ||
77 | } | ||
78 | |||
55 | void ext4_free_io_end(ext4_io_end_t *io) | 79 | void ext4_free_io_end(ext4_io_end_t *io) |
56 | { | 80 | { |
57 | int i; | 81 | int i; |
82 | wait_queue_head_t *wq; | ||
58 | 83 | ||
59 | BUG_ON(!io); | 84 | BUG_ON(!io); |
60 | if (io->page) | 85 | if (io->page) |
61 | put_page(io->page); | 86 | put_page(io->page); |
62 | for (i = 0; i < io->num_io_pages; i++) { | 87 | for (i = 0; i < io->num_io_pages; i++) |
63 | if (--io->pages[i]->p_count == 0) { | 88 | put_io_page(io->pages[i]); |
64 | struct page *page = io->pages[i]->p_page; | ||
65 | |||
66 | end_page_writeback(page); | ||
67 | put_page(page); | ||
68 | kmem_cache_free(io_page_cachep, io->pages[i]); | ||
69 | } | ||
70 | } | ||
71 | io->num_io_pages = 0; | 89 | io->num_io_pages = 0; |
72 | iput(io->inode); | 90 | wq = to_ioend_wq(io->inode); |
91 | if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count) && | ||
92 | waitqueue_active(wq)) | ||
93 | wake_up_all(wq); | ||
73 | kmem_cache_free(io_end_cachep, io); | 94 | kmem_cache_free(io_end_cachep, io); |
74 | } | 95 | } |
75 | 96 | ||
@@ -142,8 +163,8 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) | |||
142 | io = kmem_cache_alloc(io_end_cachep, flags); | 163 | io = kmem_cache_alloc(io_end_cachep, flags); |
143 | if (io) { | 164 | if (io) { |
144 | memset(io, 0, sizeof(*io)); | 165 | memset(io, 0, sizeof(*io)); |
145 | io->inode = igrab(inode); | 166 | atomic_inc(&EXT4_I(inode)->i_ioend_count); |
146 | BUG_ON(!io->inode); | 167 | io->inode = inode; |
147 | INIT_WORK(&io->work, ext4_end_io_work); | 168 | INIT_WORK(&io->work, ext4_end_io_work); |
148 | INIT_LIST_HEAD(&io->list); | 169 | INIT_LIST_HEAD(&io->list); |
149 | } | 170 | } |
@@ -171,35 +192,15 @@ static void ext4_end_bio(struct bio *bio, int error) | |||
171 | struct workqueue_struct *wq; | 192 | struct workqueue_struct *wq; |
172 | struct inode *inode; | 193 | struct inode *inode; |
173 | unsigned long flags; | 194 | unsigned long flags; |
174 | ext4_fsblk_t err_block; | ||
175 | int i; | 195 | int i; |
176 | 196 | ||
177 | BUG_ON(!io_end); | 197 | BUG_ON(!io_end); |
178 | inode = io_end->inode; | ||
179 | bio->bi_private = NULL; | 198 | bio->bi_private = NULL; |
180 | bio->bi_end_io = NULL; | 199 | bio->bi_end_io = NULL; |
181 | if (test_bit(BIO_UPTODATE, &bio->bi_flags)) | 200 | if (test_bit(BIO_UPTODATE, &bio->bi_flags)) |
182 | error = 0; | 201 | error = 0; |
183 | err_block = bio->bi_sector >> (inode->i_blkbits - 9); | ||
184 | bio_put(bio); | 202 | bio_put(bio); |
185 | 203 | ||
186 | if (!(inode->i_sb->s_flags & MS_ACTIVE)) { | ||
187 | pr_err("sb umounted, discard end_io request for inode %lu\n", | ||
188 | io_end->inode->i_ino); | ||
189 | ext4_free_io_end(io_end); | ||
190 | return; | ||
191 | } | ||
192 | |||
193 | if (error) { | ||
194 | io_end->flag |= EXT4_IO_END_ERROR; | ||
195 | ext4_warning(inode->i_sb, "I/O error writing to inode %lu " | ||
196 | "(offset %llu size %ld starting block %llu)", | ||
197 | inode->i_ino, | ||
198 | (unsigned long long) io_end->offset, | ||
199 | (long) io_end->size, | ||
200 | (unsigned long long) err_block); | ||
201 | } | ||
202 | |||
203 | for (i = 0; i < io_end->num_io_pages; i++) { | 204 | for (i = 0; i < io_end->num_io_pages; i++) { |
204 | struct page *page = io_end->pages[i]->p_page; | 205 | struct page *page = io_end->pages[i]->p_page; |
205 | struct buffer_head *bh, *head; | 206 | struct buffer_head *bh, *head; |
@@ -236,14 +237,6 @@ static void ext4_end_bio(struct bio *bio, int error) | |||
236 | } while (bh != head); | 237 | } while (bh != head); |
237 | } | 238 | } |
238 | 239 | ||
239 | if (--io_end->pages[i]->p_count == 0) { | ||
240 | struct page *page = io_end->pages[i]->p_page; | ||
241 | |||
242 | end_page_writeback(page); | ||
243 | put_page(page); | ||
244 | kmem_cache_free(io_page_cachep, io_end->pages[i]); | ||
245 | } | ||
246 | |||
247 | /* | 240 | /* |
248 | * If this is a partial write which happened to make | 241 | * If this is a partial write which happened to make |
249 | * all buffers uptodate then we can optimize away a | 242 | * all buffers uptodate then we can optimize away a |
@@ -253,9 +246,22 @@ static void ext4_end_bio(struct bio *bio, int error) | |||
253 | */ | 246 | */ |
254 | if (!partial_write) | 247 | if (!partial_write) |
255 | SetPageUptodate(page); | 248 | SetPageUptodate(page); |
256 | } | ||
257 | 249 | ||
250 | put_io_page(io_end->pages[i]); | ||
251 | } | ||
258 | io_end->num_io_pages = 0; | 252 | io_end->num_io_pages = 0; |
253 | inode = io_end->inode; | ||
254 | |||
255 | if (error) { | ||
256 | io_end->flag |= EXT4_IO_END_ERROR; | ||
257 | ext4_warning(inode->i_sb, "I/O error writing to inode %lu " | ||
258 | "(offset %llu size %ld starting block %llu)", | ||
259 | inode->i_ino, | ||
260 | (unsigned long long) io_end->offset, | ||
261 | (long) io_end->size, | ||
262 | (unsigned long long) | ||
263 | bio->bi_sector >> (inode->i_blkbits - 9)); | ||
264 | } | ||
259 | 265 | ||
260 | /* Add the io_end to per-inode completed io list*/ | 266 | /* Add the io_end to per-inode completed io list*/ |
261 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); | 267 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); |
@@ -305,7 +311,6 @@ static int io_submit_init(struct ext4_io_submit *io, | |||
305 | bio->bi_private = io->io_end = io_end; | 311 | bio->bi_private = io->io_end = io_end; |
306 | bio->bi_end_io = ext4_end_bio; | 312 | bio->bi_end_io = ext4_end_bio; |
307 | 313 | ||
308 | io_end->inode = inode; | ||
309 | io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh); | 314 | io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh); |
310 | 315 | ||
311 | io->io_bio = bio; | 316 | io->io_bio = bio; |
@@ -360,7 +365,7 @@ submit_and_retry: | |||
360 | if ((io_end->num_io_pages == 0) || | 365 | if ((io_end->num_io_pages == 0) || |
361 | (io_end->pages[io_end->num_io_pages-1] != io_page)) { | 366 | (io_end->pages[io_end->num_io_pages-1] != io_page)) { |
362 | io_end->pages[io_end->num_io_pages++] = io_page; | 367 | io_end->pages[io_end->num_io_pages++] = io_page; |
363 | io_page->p_count++; | 368 | atomic_inc(&io_page->p_count); |
364 | } | 369 | } |
365 | return 0; | 370 | return 0; |
366 | } | 371 | } |
@@ -389,7 +394,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, | |||
389 | return -ENOMEM; | 394 | return -ENOMEM; |
390 | } | 395 | } |
391 | io_page->p_page = page; | 396 | io_page->p_page = page; |
392 | io_page->p_count = 0; | 397 | atomic_set(&io_page->p_count, 1); |
393 | get_page(page); | 398 | get_page(page); |
394 | 399 | ||
395 | for (bh = head = page_buffers(page), block_start = 0; | 400 | for (bh = head = page_buffers(page), block_start = 0; |
@@ -421,10 +426,6 @@ int ext4_bio_write_page(struct ext4_io_submit *io, | |||
421 | * PageWriteback bit from the page to prevent the system from | 426 | * PageWriteback bit from the page to prevent the system from |
422 | * wedging later on. | 427 | * wedging later on. |
423 | */ | 428 | */ |
424 | if (io_page->p_count == 0) { | 429 | put_io_page(io_page); |
425 | put_page(page); | ||
426 | end_page_writeback(page); | ||
427 | kmem_cache_free(io_page_cachep, io_page); | ||
428 | } | ||
429 | return ret; | 430 | return ret; |
430 | } | 431 | } |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 40131b777af6..fb15c9c0be74 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -828,12 +828,22 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | |||
828 | ei->cur_aio_dio = NULL; | 828 | ei->cur_aio_dio = NULL; |
829 | ei->i_sync_tid = 0; | 829 | ei->i_sync_tid = 0; |
830 | ei->i_datasync_tid = 0; | 830 | ei->i_datasync_tid = 0; |
831 | atomic_set(&ei->i_ioend_count, 0); | ||
831 | 832 | ||
832 | return &ei->vfs_inode; | 833 | return &ei->vfs_inode; |
833 | } | 834 | } |
834 | 835 | ||
836 | static int ext4_drop_inode(struct inode *inode) | ||
837 | { | ||
838 | int drop = generic_drop_inode(inode); | ||
839 | |||
840 | trace_ext4_drop_inode(inode, drop); | ||
841 | return drop; | ||
842 | } | ||
843 | |||
835 | static void ext4_destroy_inode(struct inode *inode) | 844 | static void ext4_destroy_inode(struct inode *inode) |
836 | { | 845 | { |
846 | ext4_ioend_wait(inode); | ||
837 | if (!list_empty(&(EXT4_I(inode)->i_orphan))) { | 847 | if (!list_empty(&(EXT4_I(inode)->i_orphan))) { |
838 | ext4_msg(inode->i_sb, KERN_ERR, | 848 | ext4_msg(inode->i_sb, KERN_ERR, |
839 | "Inode %lu (%p): orphan list check failed!", | 849 | "Inode %lu (%p): orphan list check failed!", |
@@ -1016,6 +1026,8 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
1016 | !(def_mount_opts & EXT4_DEFM_NODELALLOC)) | 1026 | !(def_mount_opts & EXT4_DEFM_NODELALLOC)) |
1017 | seq_puts(seq, ",nodelalloc"); | 1027 | seq_puts(seq, ",nodelalloc"); |
1018 | 1028 | ||
1029 | if (test_opt(sb, MBLK_IO_SUBMIT)) | ||
1030 | seq_puts(seq, ",mblk_io_submit"); | ||
1019 | if (sbi->s_stripe) | 1031 | if (sbi->s_stripe) |
1020 | seq_printf(seq, ",stripe=%lu", sbi->s_stripe); | 1032 | seq_printf(seq, ",stripe=%lu", sbi->s_stripe); |
1021 | /* | 1033 | /* |
@@ -1173,6 +1185,7 @@ static const struct super_operations ext4_sops = { | |||
1173 | .destroy_inode = ext4_destroy_inode, | 1185 | .destroy_inode = ext4_destroy_inode, |
1174 | .write_inode = ext4_write_inode, | 1186 | .write_inode = ext4_write_inode, |
1175 | .dirty_inode = ext4_dirty_inode, | 1187 | .dirty_inode = ext4_dirty_inode, |
1188 | .drop_inode = ext4_drop_inode, | ||
1176 | .evict_inode = ext4_evict_inode, | 1189 | .evict_inode = ext4_evict_inode, |
1177 | .put_super = ext4_put_super, | 1190 | .put_super = ext4_put_super, |
1178 | .sync_fs = ext4_sync_fs, | 1191 | .sync_fs = ext4_sync_fs, |
@@ -1186,7 +1199,6 @@ static const struct super_operations ext4_sops = { | |||
1186 | .quota_write = ext4_quota_write, | 1199 | .quota_write = ext4_quota_write, |
1187 | #endif | 1200 | #endif |
1188 | .bdev_try_to_free_page = bdev_try_to_free_page, | 1201 | .bdev_try_to_free_page = bdev_try_to_free_page, |
1189 | .trim_fs = ext4_trim_fs | ||
1190 | }; | 1202 | }; |
1191 | 1203 | ||
1192 | static const struct super_operations ext4_nojournal_sops = { | 1204 | static const struct super_operations ext4_nojournal_sops = { |
@@ -1194,6 +1206,7 @@ static const struct super_operations ext4_nojournal_sops = { | |||
1194 | .destroy_inode = ext4_destroy_inode, | 1206 | .destroy_inode = ext4_destroy_inode, |
1195 | .write_inode = ext4_write_inode, | 1207 | .write_inode = ext4_write_inode, |
1196 | .dirty_inode = ext4_dirty_inode, | 1208 | .dirty_inode = ext4_dirty_inode, |
1209 | .drop_inode = ext4_drop_inode, | ||
1197 | .evict_inode = ext4_evict_inode, | 1210 | .evict_inode = ext4_evict_inode, |
1198 | .write_super = ext4_write_super, | 1211 | .write_super = ext4_write_super, |
1199 | .put_super = ext4_put_super, | 1212 | .put_super = ext4_put_super, |
@@ -1228,8 +1241,8 @@ enum { | |||
1228 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, | 1241 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, |
1229 | Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, | 1242 | Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, |
1230 | Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version, | 1243 | Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version, |
1231 | Opt_stripe, Opt_delalloc, Opt_nodelalloc, | 1244 | Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit, |
1232 | Opt_block_validity, Opt_noblock_validity, | 1245 | Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity, |
1233 | Opt_inode_readahead_blks, Opt_journal_ioprio, | 1246 | Opt_inode_readahead_blks, Opt_journal_ioprio, |
1234 | Opt_dioread_nolock, Opt_dioread_lock, | 1247 | Opt_dioread_nolock, Opt_dioread_lock, |
1235 | Opt_discard, Opt_nodiscard, | 1248 | Opt_discard, Opt_nodiscard, |
@@ -1293,6 +1306,8 @@ static const match_table_t tokens = { | |||
1293 | {Opt_resize, "resize"}, | 1306 | {Opt_resize, "resize"}, |
1294 | {Opt_delalloc, "delalloc"}, | 1307 | {Opt_delalloc, "delalloc"}, |
1295 | {Opt_nodelalloc, "nodelalloc"}, | 1308 | {Opt_nodelalloc, "nodelalloc"}, |
1309 | {Opt_mblk_io_submit, "mblk_io_submit"}, | ||
1310 | {Opt_nomblk_io_submit, "nomblk_io_submit"}, | ||
1296 | {Opt_block_validity, "block_validity"}, | 1311 | {Opt_block_validity, "block_validity"}, |
1297 | {Opt_noblock_validity, "noblock_validity"}, | 1312 | {Opt_noblock_validity, "noblock_validity"}, |
1298 | {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, | 1313 | {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, |
@@ -1714,6 +1729,12 @@ set_qf_format: | |||
1714 | case Opt_nodelalloc: | 1729 | case Opt_nodelalloc: |
1715 | clear_opt(sbi->s_mount_opt, DELALLOC); | 1730 | clear_opt(sbi->s_mount_opt, DELALLOC); |
1716 | break; | 1731 | break; |
1732 | case Opt_mblk_io_submit: | ||
1733 | set_opt(sbi->s_mount_opt, MBLK_IO_SUBMIT); | ||
1734 | break; | ||
1735 | case Opt_nomblk_io_submit: | ||
1736 | clear_opt(sbi->s_mount_opt, MBLK_IO_SUBMIT); | ||
1737 | break; | ||
1717 | case Opt_stripe: | 1738 | case Opt_stripe: |
1718 | if (match_int(&args[0], &option)) | 1739 | if (match_int(&args[0], &option)) |
1719 | return 0; | 1740 | return 0; |
@@ -2699,7 +2720,6 @@ static int ext4_lazyinit_thread(void *arg) | |||
2699 | struct ext4_li_request *elr; | 2720 | struct ext4_li_request *elr; |
2700 | unsigned long next_wakeup; | 2721 | unsigned long next_wakeup; |
2701 | DEFINE_WAIT(wait); | 2722 | DEFINE_WAIT(wait); |
2702 | int ret; | ||
2703 | 2723 | ||
2704 | BUG_ON(NULL == eli); | 2724 | BUG_ON(NULL == eli); |
2705 | 2725 | ||
@@ -2723,13 +2743,12 @@ cont_thread: | |||
2723 | elr = list_entry(pos, struct ext4_li_request, | 2743 | elr = list_entry(pos, struct ext4_li_request, |
2724 | lr_request); | 2744 | lr_request); |
2725 | 2745 | ||
2726 | if (time_after_eq(jiffies, elr->lr_next_sched)) | 2746 | if (time_after_eq(jiffies, elr->lr_next_sched)) { |
2727 | ret = ext4_run_li_request(elr); | 2747 | if (ext4_run_li_request(elr) != 0) { |
2728 | 2748 | /* error, remove the lazy_init job */ | |
2729 | if (ret) { | 2749 | ext4_remove_li_request(elr); |
2730 | ret = 0; | 2750 | continue; |
2731 | ext4_remove_li_request(elr); | 2751 | } |
2732 | continue; | ||
2733 | } | 2752 | } |
2734 | 2753 | ||
2735 | if (time_before(elr->lr_next_sched, next_wakeup)) | 2754 | if (time_before(elr->lr_next_sched, next_wakeup)) |
@@ -2740,7 +2759,8 @@ cont_thread: | |||
2740 | if (freezing(current)) | 2759 | if (freezing(current)) |
2741 | refrigerator(); | 2760 | refrigerator(); |
2742 | 2761 | ||
2743 | if (time_after_eq(jiffies, next_wakeup)) { | 2762 | if ((time_after_eq(jiffies, next_wakeup)) || |
2763 | (MAX_JIFFY_OFFSET == next_wakeup)) { | ||
2744 | cond_resched(); | 2764 | cond_resched(); |
2745 | continue; | 2765 | continue; |
2746 | } | 2766 | } |
@@ -2788,9 +2808,6 @@ static void ext4_clear_request_list(void) | |||
2788 | struct ext4_li_request *elr; | 2808 | struct ext4_li_request *elr; |
2789 | 2809 | ||
2790 | mutex_lock(&ext4_li_info->li_list_mtx); | 2810 | mutex_lock(&ext4_li_info->li_list_mtx); |
2791 | if (list_empty(&ext4_li_info->li_request_list)) | ||
2792 | return; | ||
2793 | |||
2794 | list_for_each_safe(pos, n, &ext4_li_info->li_request_list) { | 2811 | list_for_each_safe(pos, n, &ext4_li_info->li_request_list) { |
2795 | elr = list_entry(pos, struct ext4_li_request, | 2812 | elr = list_entry(pos, struct ext4_li_request, |
2796 | lr_request); | 2813 | lr_request); |
@@ -3257,13 +3274,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3257 | * Test whether we have more sectors than will fit in sector_t, | 3274 | * Test whether we have more sectors than will fit in sector_t, |
3258 | * and whether the max offset is addressable by the page cache. | 3275 | * and whether the max offset is addressable by the page cache. |
3259 | */ | 3276 | */ |
3260 | ret = generic_check_addressable(sb->s_blocksize_bits, | 3277 | err = generic_check_addressable(sb->s_blocksize_bits, |
3261 | ext4_blocks_count(es)); | 3278 | ext4_blocks_count(es)); |
3262 | if (ret) { | 3279 | if (err) { |
3263 | ext4_msg(sb, KERN_ERR, "filesystem" | 3280 | ext4_msg(sb, KERN_ERR, "filesystem" |
3264 | " too large to mount safely on this system"); | 3281 | " too large to mount safely on this system"); |
3265 | if (sizeof(sector_t) < 8) | 3282 | if (sizeof(sector_t) < 8) |
3266 | ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled"); | 3283 | ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled"); |
3284 | ret = err; | ||
3267 | goto failed_mount; | 3285 | goto failed_mount; |
3268 | } | 3286 | } |
3269 | 3287 | ||
@@ -3348,6 +3366,24 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3348 | get_random_bytes(&sbi->s_next_generation, sizeof(u32)); | 3366 | get_random_bytes(&sbi->s_next_generation, sizeof(u32)); |
3349 | spin_lock_init(&sbi->s_next_gen_lock); | 3367 | spin_lock_init(&sbi->s_next_gen_lock); |
3350 | 3368 | ||
3369 | err = percpu_counter_init(&sbi->s_freeblocks_counter, | ||
3370 | ext4_count_free_blocks(sb)); | ||
3371 | if (!err) { | ||
3372 | err = percpu_counter_init(&sbi->s_freeinodes_counter, | ||
3373 | ext4_count_free_inodes(sb)); | ||
3374 | } | ||
3375 | if (!err) { | ||
3376 | err = percpu_counter_init(&sbi->s_dirs_counter, | ||
3377 | ext4_count_dirs(sb)); | ||
3378 | } | ||
3379 | if (!err) { | ||
3380 | err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); | ||
3381 | } | ||
3382 | if (err) { | ||
3383 | ext4_msg(sb, KERN_ERR, "insufficient memory"); | ||
3384 | goto failed_mount3; | ||
3385 | } | ||
3386 | |||
3351 | sbi->s_stripe = ext4_get_stripe_size(sbi); | 3387 | sbi->s_stripe = ext4_get_stripe_size(sbi); |
3352 | sbi->s_max_writeback_mb_bump = 128; | 3388 | sbi->s_max_writeback_mb_bump = 128; |
3353 | 3389 | ||
@@ -3446,22 +3482,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3446 | } | 3482 | } |
3447 | set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); | 3483 | set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); |
3448 | 3484 | ||
3449 | no_journal: | 3485 | /* |
3450 | err = percpu_counter_init(&sbi->s_freeblocks_counter, | 3486 | * The journal may have updated the bg summary counts, so we |
3451 | ext4_count_free_blocks(sb)); | 3487 | * need to update the global counters. |
3452 | if (!err) | 3488 | */ |
3453 | err = percpu_counter_init(&sbi->s_freeinodes_counter, | 3489 | percpu_counter_set(&sbi->s_freeblocks_counter, |
3454 | ext4_count_free_inodes(sb)); | 3490 | ext4_count_free_blocks(sb)); |
3455 | if (!err) | 3491 | percpu_counter_set(&sbi->s_freeinodes_counter, |
3456 | err = percpu_counter_init(&sbi->s_dirs_counter, | 3492 | ext4_count_free_inodes(sb)); |
3457 | ext4_count_dirs(sb)); | 3493 | percpu_counter_set(&sbi->s_dirs_counter, |
3458 | if (!err) | 3494 | ext4_count_dirs(sb)); |
3459 | err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); | 3495 | percpu_counter_set(&sbi->s_dirtyblocks_counter, 0); |
3460 | if (err) { | ||
3461 | ext4_msg(sb, KERN_ERR, "insufficient memory"); | ||
3462 | goto failed_mount_wq; | ||
3463 | } | ||
3464 | 3496 | ||
3497 | no_journal: | ||
3465 | EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten"); | 3498 | EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten"); |
3466 | if (!EXT4_SB(sb)->dio_unwritten_wq) { | 3499 | if (!EXT4_SB(sb)->dio_unwritten_wq) { |
3467 | printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n"); | 3500 | printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n"); |
@@ -3611,10 +3644,6 @@ failed_mount_wq: | |||
3611 | jbd2_journal_destroy(sbi->s_journal); | 3644 | jbd2_journal_destroy(sbi->s_journal); |
3612 | sbi->s_journal = NULL; | 3645 | sbi->s_journal = NULL; |
3613 | } | 3646 | } |
3614 | percpu_counter_destroy(&sbi->s_freeblocks_counter); | ||
3615 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | ||
3616 | percpu_counter_destroy(&sbi->s_dirs_counter); | ||
3617 | percpu_counter_destroy(&sbi->s_dirtyblocks_counter); | ||
3618 | failed_mount3: | 3647 | failed_mount3: |
3619 | if (sbi->s_flex_groups) { | 3648 | if (sbi->s_flex_groups) { |
3620 | if (is_vmalloc_addr(sbi->s_flex_groups)) | 3649 | if (is_vmalloc_addr(sbi->s_flex_groups)) |
@@ -3622,6 +3651,10 @@ failed_mount3: | |||
3622 | else | 3651 | else |
3623 | kfree(sbi->s_flex_groups); | 3652 | kfree(sbi->s_flex_groups); |
3624 | } | 3653 | } |
3654 | percpu_counter_destroy(&sbi->s_freeblocks_counter); | ||
3655 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | ||
3656 | percpu_counter_destroy(&sbi->s_dirs_counter); | ||
3657 | percpu_counter_destroy(&sbi->s_dirtyblocks_counter); | ||
3625 | failed_mount2: | 3658 | failed_mount2: |
3626 | for (i = 0; i < db_count; i++) | 3659 | for (i = 0; i < db_count; i++) |
3627 | brelse(sbi->s_group_desc[i]); | 3660 | brelse(sbi->s_group_desc[i]); |
@@ -3949,13 +3982,11 @@ static int ext4_commit_super(struct super_block *sb, int sync) | |||
3949 | else | 3982 | else |
3950 | es->s_kbytes_written = | 3983 | es->s_kbytes_written = |
3951 | cpu_to_le64(EXT4_SB(sb)->s_kbytes_written); | 3984 | cpu_to_le64(EXT4_SB(sb)->s_kbytes_written); |
3952 | if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeblocks_counter)) | 3985 | ext4_free_blocks_count_set(es, percpu_counter_sum_positive( |
3953 | ext4_free_blocks_count_set(es, percpu_counter_sum_positive( | 3986 | &EXT4_SB(sb)->s_freeblocks_counter)); |
3954 | &EXT4_SB(sb)->s_freeblocks_counter)); | 3987 | es->s_free_inodes_count = |
3955 | if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter)) | 3988 | cpu_to_le32(percpu_counter_sum_positive( |
3956 | es->s_free_inodes_count = | 3989 | &EXT4_SB(sb)->s_freeinodes_counter)); |
3957 | cpu_to_le32(percpu_counter_sum_positive( | ||
3958 | &EXT4_SB(sb)->s_freeinodes_counter)); | ||
3959 | sb->s_dirt = 0; | 3990 | sb->s_dirt = 0; |
3960 | BUFFER_TRACE(sbh, "marking dirty"); | 3991 | BUFFER_TRACE(sbh, "marking dirty"); |
3961 | mark_buffer_dirty(sbh); | 3992 | mark_buffer_dirty(sbh); |
@@ -4556,12 +4587,10 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, | |||
4556 | 4587 | ||
4557 | static int ext4_quota_off(struct super_block *sb, int type) | 4588 | static int ext4_quota_off(struct super_block *sb, int type) |
4558 | { | 4589 | { |
4559 | /* Force all delayed allocation blocks to be allocated */ | 4590 | /* Force all delayed allocation blocks to be allocated. |
4560 | if (test_opt(sb, DELALLOC)) { | 4591 | * Caller already holds s_umount sem */ |
4561 | down_read(&sb->s_umount); | 4592 | if (test_opt(sb, DELALLOC)) |
4562 | sync_filesystem(sb); | 4593 | sync_filesystem(sb); |
4563 | up_read(&sb->s_umount); | ||
4564 | } | ||
4565 | 4594 | ||
4566 | return dquot_quota_off(sb, type); | 4595 | return dquot_quota_off(sb, type); |
4567 | } | 4596 | } |
diff --git a/fs/fuse/file.c b/fs/fuse/file.c index c8224587123f..8b984a2cebbd 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/kernel.h> | 13 | #include <linux/kernel.h> |
14 | #include <linux/sched.h> | 14 | #include <linux/sched.h> |
15 | #include <linux/module.h> | 15 | #include <linux/module.h> |
16 | #include <linux/compat.h> | ||
16 | 17 | ||
17 | static const struct file_operations fuse_direct_io_file_operations; | 18 | static const struct file_operations fuse_direct_io_file_operations; |
18 | 19 | ||
@@ -134,6 +135,7 @@ EXPORT_SYMBOL_GPL(fuse_do_open); | |||
134 | void fuse_finish_open(struct inode *inode, struct file *file) | 135 | void fuse_finish_open(struct inode *inode, struct file *file) |
135 | { | 136 | { |
136 | struct fuse_file *ff = file->private_data; | 137 | struct fuse_file *ff = file->private_data; |
138 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
137 | 139 | ||
138 | if (ff->open_flags & FOPEN_DIRECT_IO) | 140 | if (ff->open_flags & FOPEN_DIRECT_IO) |
139 | file->f_op = &fuse_direct_io_file_operations; | 141 | file->f_op = &fuse_direct_io_file_operations; |
@@ -141,6 +143,15 @@ void fuse_finish_open(struct inode *inode, struct file *file) | |||
141 | invalidate_inode_pages2(inode->i_mapping); | 143 | invalidate_inode_pages2(inode->i_mapping); |
142 | if (ff->open_flags & FOPEN_NONSEEKABLE) | 144 | if (ff->open_flags & FOPEN_NONSEEKABLE) |
143 | nonseekable_open(inode, file); | 145 | nonseekable_open(inode, file); |
146 | if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC)) { | ||
147 | struct fuse_inode *fi = get_fuse_inode(inode); | ||
148 | |||
149 | spin_lock(&fc->lock); | ||
150 | fi->attr_version = ++fc->attr_version; | ||
151 | i_size_write(inode, 0); | ||
152 | spin_unlock(&fc->lock); | ||
153 | fuse_invalidate_attr(inode); | ||
154 | } | ||
144 | } | 155 | } |
145 | 156 | ||
146 | int fuse_open_common(struct inode *inode, struct file *file, bool isdir) | 157 | int fuse_open_common(struct inode *inode, struct file *file, bool isdir) |
@@ -1618,6 +1629,58 @@ static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov, | |||
1618 | } | 1629 | } |
1619 | 1630 | ||
1620 | /* | 1631 | /* |
1632 | * CUSE servers compiled on 32bit broke on 64bit kernels because the | ||
1633 | * ABI was defined to be 'struct iovec' which is different on 32bit | ||
1634 | * and 64bit. Fortunately we can determine which structure the server | ||
1635 | * used from the size of the reply. | ||
1636 | */ | ||
1637 | static int fuse_copy_ioctl_iovec(struct iovec *dst, void *src, | ||
1638 | size_t transferred, unsigned count, | ||
1639 | bool is_compat) | ||
1640 | { | ||
1641 | #ifdef CONFIG_COMPAT | ||
1642 | if (count * sizeof(struct compat_iovec) == transferred) { | ||
1643 | struct compat_iovec *ciov = src; | ||
1644 | unsigned i; | ||
1645 | |||
1646 | /* | ||
1647 | * With this interface a 32bit server cannot support | ||
1648 | * non-compat (i.e. ones coming from 64bit apps) ioctl | ||
1649 | * requests | ||
1650 | */ | ||
1651 | if (!is_compat) | ||
1652 | return -EINVAL; | ||
1653 | |||
1654 | for (i = 0; i < count; i++) { | ||
1655 | dst[i].iov_base = compat_ptr(ciov[i].iov_base); | ||
1656 | dst[i].iov_len = ciov[i].iov_len; | ||
1657 | } | ||
1658 | return 0; | ||
1659 | } | ||
1660 | #endif | ||
1661 | |||
1662 | if (count * sizeof(struct iovec) != transferred) | ||
1663 | return -EIO; | ||
1664 | |||
1665 | memcpy(dst, src, transferred); | ||
1666 | return 0; | ||
1667 | } | ||
1668 | |||
1669 | /* Make sure iov_length() won't overflow */ | ||
1670 | static int fuse_verify_ioctl_iov(struct iovec *iov, size_t count) | ||
1671 | { | ||
1672 | size_t n; | ||
1673 | u32 max = FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT; | ||
1674 | |||
1675 | for (n = 0; n < count; n++) { | ||
1676 | if (iov->iov_len > (size_t) max) | ||
1677 | return -ENOMEM; | ||
1678 | max -= iov->iov_len; | ||
1679 | } | ||
1680 | return 0; | ||
1681 | } | ||
1682 | |||
1683 | /* | ||
1621 | * For ioctls, there is no generic way to determine how much memory | 1684 | * For ioctls, there is no generic way to determine how much memory |
1622 | * needs to be read and/or written. Furthermore, ioctls are allowed | 1685 | * needs to be read and/or written. Furthermore, ioctls are allowed |
1623 | * to dereference the passed pointer, so the parameter requires deep | 1686 | * to dereference the passed pointer, so the parameter requires deep |
@@ -1798,18 +1861,25 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, | |||
1798 | in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV) | 1861 | in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV) |
1799 | goto out; | 1862 | goto out; |
1800 | 1863 | ||
1801 | err = -EIO; | ||
1802 | if ((in_iovs + out_iovs) * sizeof(struct iovec) != transferred) | ||
1803 | goto out; | ||
1804 | |||
1805 | /* okay, copy in iovs and retry */ | ||
1806 | vaddr = kmap_atomic(pages[0], KM_USER0); | 1864 | vaddr = kmap_atomic(pages[0], KM_USER0); |
1807 | memcpy(page_address(iov_page), vaddr, transferred); | 1865 | err = fuse_copy_ioctl_iovec(page_address(iov_page), vaddr, |
1866 | transferred, in_iovs + out_iovs, | ||
1867 | (flags & FUSE_IOCTL_COMPAT) != 0); | ||
1808 | kunmap_atomic(vaddr, KM_USER0); | 1868 | kunmap_atomic(vaddr, KM_USER0); |
1869 | if (err) | ||
1870 | goto out; | ||
1809 | 1871 | ||
1810 | in_iov = page_address(iov_page); | 1872 | in_iov = page_address(iov_page); |
1811 | out_iov = in_iov + in_iovs; | 1873 | out_iov = in_iov + in_iovs; |
1812 | 1874 | ||
1875 | err = fuse_verify_ioctl_iov(in_iov, in_iovs); | ||
1876 | if (err) | ||
1877 | goto out; | ||
1878 | |||
1879 | err = fuse_verify_ioctl_iov(out_iov, out_iovs); | ||
1880 | if (err) | ||
1881 | goto out; | ||
1882 | |||
1813 | goto retry; | 1883 | goto retry; |
1814 | } | 1884 | } |
1815 | 1885 | ||
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c index 06d582732d34..5ab3839dfcb9 100644 --- a/fs/gfs2/export.c +++ b/fs/gfs2/export.c | |||
@@ -138,10 +138,8 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, | |||
138 | struct gfs2_inum_host *inum) | 138 | struct gfs2_inum_host *inum) |
139 | { | 139 | { |
140 | struct gfs2_sbd *sdp = sb->s_fs_info; | 140 | struct gfs2_sbd *sdp = sb->s_fs_info; |
141 | struct gfs2_holder i_gh; | ||
142 | struct inode *inode; | 141 | struct inode *inode; |
143 | struct dentry *dentry; | 142 | struct dentry *dentry; |
144 | int error; | ||
145 | 143 | ||
146 | inode = gfs2_ilookup(sb, inum->no_addr); | 144 | inode = gfs2_ilookup(sb, inum->no_addr); |
147 | if (inode) { | 145 | if (inode) { |
@@ -152,52 +150,16 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, | |||
152 | goto out_inode; | 150 | goto out_inode; |
153 | } | 151 | } |
154 | 152 | ||
155 | error = gfs2_glock_nq_num(sdp, inum->no_addr, &gfs2_inode_glops, | 153 | inode = gfs2_lookup_by_inum(sdp, inum->no_addr, &inum->no_formal_ino, |
156 | LM_ST_SHARED, LM_FLAG_ANY, &i_gh); | 154 | GFS2_BLKST_DINODE); |
157 | if (error) | 155 | if (IS_ERR(inode)) |
158 | return ERR_PTR(error); | 156 | return ERR_CAST(inode); |
159 | |||
160 | error = gfs2_check_blk_type(sdp, inum->no_addr, GFS2_BLKST_DINODE); | ||
161 | if (error) | ||
162 | goto fail; | ||
163 | |||
164 | inode = gfs2_inode_lookup(sb, DT_UNKNOWN, inum->no_addr, 0); | ||
165 | if (IS_ERR(inode)) { | ||
166 | error = PTR_ERR(inode); | ||
167 | goto fail; | ||
168 | } | ||
169 | |||
170 | error = gfs2_inode_refresh(GFS2_I(inode)); | ||
171 | if (error) { | ||
172 | iput(inode); | ||
173 | goto fail; | ||
174 | } | ||
175 | |||
176 | /* Pick up the works we bypass in gfs2_inode_lookup */ | ||
177 | if (inode->i_state & I_NEW) | ||
178 | gfs2_set_iop(inode); | ||
179 | |||
180 | if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) { | ||
181 | iput(inode); | ||
182 | goto fail; | ||
183 | } | ||
184 | |||
185 | error = -EIO; | ||
186 | if (GFS2_I(inode)->i_diskflags & GFS2_DIF_SYSTEM) { | ||
187 | iput(inode); | ||
188 | goto fail; | ||
189 | } | ||
190 | |||
191 | gfs2_glock_dq_uninit(&i_gh); | ||
192 | 157 | ||
193 | out_inode: | 158 | out_inode: |
194 | dentry = d_obtain_alias(inode); | 159 | dentry = d_obtain_alias(inode); |
195 | if (!IS_ERR(dentry)) | 160 | if (!IS_ERR(dentry)) |
196 | dentry->d_op = &gfs2_dops; | 161 | dentry->d_op = &gfs2_dops; |
197 | return dentry; | 162 | return dentry; |
198 | fail: | ||
199 | gfs2_glock_dq_uninit(&i_gh); | ||
200 | return ERR_PTR(error); | ||
201 | } | 163 | } |
202 | 164 | ||
203 | static struct dentry *gfs2_fh_to_dentry(struct super_block *sb, struct fid *fid, | 165 | static struct dentry *gfs2_fh_to_dentry(struct super_block *sb, struct fid *fid, |
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 87778857f099..f92c17704169 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c | |||
@@ -686,21 +686,20 @@ static void delete_work_func(struct work_struct *work) | |||
686 | { | 686 | { |
687 | struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_delete); | 687 | struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_delete); |
688 | struct gfs2_sbd *sdp = gl->gl_sbd; | 688 | struct gfs2_sbd *sdp = gl->gl_sbd; |
689 | struct gfs2_inode *ip = NULL; | 689 | struct gfs2_inode *ip; |
690 | struct inode *inode; | 690 | struct inode *inode; |
691 | u64 no_addr = 0; | 691 | u64 no_addr = gl->gl_name.ln_number; |
692 | |||
693 | ip = gl->gl_object; | ||
694 | /* Note: Unsafe to dereference ip as we don't hold right refs/locks */ | ||
692 | 695 | ||
693 | spin_lock(&gl->gl_spin); | ||
694 | ip = (struct gfs2_inode *)gl->gl_object; | ||
695 | if (ip) | 696 | if (ip) |
696 | no_addr = ip->i_no_addr; | ||
697 | spin_unlock(&gl->gl_spin); | ||
698 | if (ip) { | ||
699 | inode = gfs2_ilookup(sdp->sd_vfs, no_addr); | 697 | inode = gfs2_ilookup(sdp->sd_vfs, no_addr); |
700 | if (inode) { | 698 | else |
701 | d_prune_aliases(inode); | 699 | inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED); |
702 | iput(inode); | 700 | if (inode && !IS_ERR(inode)) { |
703 | } | 701 | d_prune_aliases(inode); |
702 | iput(inode); | ||
704 | } | 703 | } |
705 | gfs2_glock_put(gl); | 704 | gfs2_glock_put(gl); |
706 | } | 705 | } |
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 06370f8bd8cf..e1213f7f9217 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c | |||
@@ -73,49 +73,6 @@ static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr) | |||
73 | return iget5_locked(sb, hash, iget_test, iget_set, &no_addr); | 73 | return iget5_locked(sb, hash, iget_test, iget_set, &no_addr); |
74 | } | 74 | } |
75 | 75 | ||
76 | struct gfs2_skip_data { | ||
77 | u64 no_addr; | ||
78 | int skipped; | ||
79 | }; | ||
80 | |||
81 | static int iget_skip_test(struct inode *inode, void *opaque) | ||
82 | { | ||
83 | struct gfs2_inode *ip = GFS2_I(inode); | ||
84 | struct gfs2_skip_data *data = opaque; | ||
85 | |||
86 | if (ip->i_no_addr == data->no_addr) { | ||
87 | if (inode->i_state & (I_FREEING|I_WILL_FREE)){ | ||
88 | data->skipped = 1; | ||
89 | return 0; | ||
90 | } | ||
91 | return 1; | ||
92 | } | ||
93 | return 0; | ||
94 | } | ||
95 | |||
96 | static int iget_skip_set(struct inode *inode, void *opaque) | ||
97 | { | ||
98 | struct gfs2_inode *ip = GFS2_I(inode); | ||
99 | struct gfs2_skip_data *data = opaque; | ||
100 | |||
101 | if (data->skipped) | ||
102 | return 1; | ||
103 | inode->i_ino = (unsigned long)(data->no_addr); | ||
104 | ip->i_no_addr = data->no_addr; | ||
105 | return 0; | ||
106 | } | ||
107 | |||
108 | static struct inode *gfs2_iget_skip(struct super_block *sb, | ||
109 | u64 no_addr) | ||
110 | { | ||
111 | struct gfs2_skip_data data; | ||
112 | unsigned long hash = (unsigned long)no_addr; | ||
113 | |||
114 | data.no_addr = no_addr; | ||
115 | data.skipped = 0; | ||
116 | return iget5_locked(sb, hash, iget_skip_test, iget_skip_set, &data); | ||
117 | } | ||
118 | |||
119 | /** | 76 | /** |
120 | * GFS2 lookup code fills in vfs inode contents based on info obtained | 77 | * GFS2 lookup code fills in vfs inode contents based on info obtained |
121 | * from directory entry inside gfs2_inode_lookup(). This has caused issues | 78 | * from directory entry inside gfs2_inode_lookup(). This has caused issues |
@@ -243,93 +200,54 @@ fail: | |||
243 | return ERR_PTR(error); | 200 | return ERR_PTR(error); |
244 | } | 201 | } |
245 | 202 | ||
246 | /** | 203 | struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, |
247 | * gfs2_process_unlinked_inode - Lookup an unlinked inode for reclamation | 204 | u64 *no_formal_ino, unsigned int blktype) |
248 | * and try to reclaim it by doing iput. | ||
249 | * | ||
250 | * This function assumes no rgrp locks are currently held. | ||
251 | * | ||
252 | * @sb: The super block | ||
253 | * no_addr: The inode number | ||
254 | * | ||
255 | */ | ||
256 | |||
257 | void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr) | ||
258 | { | 205 | { |
259 | struct gfs2_sbd *sdp; | 206 | struct super_block *sb = sdp->sd_vfs; |
260 | struct gfs2_inode *ip; | 207 | struct gfs2_holder i_gh; |
261 | struct gfs2_glock *io_gl = NULL; | ||
262 | int error; | ||
263 | struct gfs2_holder gh; | ||
264 | struct inode *inode; | 208 | struct inode *inode; |
209 | int error; | ||
265 | 210 | ||
266 | inode = gfs2_iget_skip(sb, no_addr); | 211 | error = gfs2_glock_nq_num(sdp, no_addr, &gfs2_inode_glops, |
267 | 212 | LM_ST_SHARED, LM_FLAG_ANY, &i_gh); | |
268 | if (!inode) | 213 | if (error) |
269 | return; | 214 | return ERR_PTR(error); |
270 | |||
271 | /* If it's not a new inode, someone's using it, so leave it alone. */ | ||
272 | if (!(inode->i_state & I_NEW)) { | ||
273 | iput(inode); | ||
274 | return; | ||
275 | } | ||
276 | |||
277 | ip = GFS2_I(inode); | ||
278 | sdp = GFS2_SB(inode); | ||
279 | ip->i_no_formal_ino = -1; | ||
280 | 215 | ||
281 | error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); | 216 | error = gfs2_check_blk_type(sdp, no_addr, blktype); |
282 | if (unlikely(error)) | 217 | if (error) |
283 | goto fail; | 218 | goto fail; |
284 | ip->i_gl->gl_object = ip; | ||
285 | 219 | ||
286 | error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl); | 220 | inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0); |
287 | if (unlikely(error)) | 221 | if (IS_ERR(inode)) |
288 | goto fail_put; | 222 | goto fail; |
289 | |||
290 | set_bit(GIF_INVALID, &ip->i_flags); | ||
291 | error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, LM_FLAG_TRY | GL_EXACT, | ||
292 | &ip->i_iopen_gh); | ||
293 | if (unlikely(error)) | ||
294 | goto fail_iopen; | ||
295 | 223 | ||
296 | ip->i_iopen_gh.gh_gl->gl_object = ip; | 224 | error = gfs2_inode_refresh(GFS2_I(inode)); |
297 | gfs2_glock_put(io_gl); | 225 | if (error) |
298 | io_gl = NULL; | 226 | goto fail_iput; |
299 | 227 | ||
300 | inode->i_mode = DT2IF(DT_UNKNOWN); | 228 | /* Pick up the works we bypass in gfs2_inode_lookup */ |
229 | if (inode->i_state & I_NEW) | ||
230 | gfs2_set_iop(inode); | ||
301 | 231 | ||
302 | /* | 232 | /* Two extra checks for NFS only */ |
303 | * We must read the inode in order to work out its type in | 233 | if (no_formal_ino) { |
304 | * this case. Note that this doesn't happen often as we normally | 234 | error = -ESTALE; |
305 | * know the type beforehand. This code path only occurs during | 235 | if (GFS2_I(inode)->i_no_formal_ino != *no_formal_ino) |
306 | * unlinked inode recovery (where it is safe to do this glock, | 236 | goto fail_iput; |
307 | * which is not true in the general case). | ||
308 | */ | ||
309 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, LM_FLAG_TRY, | ||
310 | &gh); | ||
311 | if (unlikely(error)) | ||
312 | goto fail_glock; | ||
313 | 237 | ||
314 | /* Inode is now uptodate */ | 238 | error = -EIO; |
315 | gfs2_glock_dq_uninit(&gh); | 239 | if (GFS2_I(inode)->i_diskflags & GFS2_DIF_SYSTEM) |
316 | gfs2_set_iop(inode); | 240 | goto fail_iput; |
317 | 241 | ||
318 | /* The iput will cause it to be deleted. */ | 242 | error = 0; |
319 | iput(inode); | 243 | } |
320 | return; | ||
321 | 244 | ||
322 | fail_glock: | ||
323 | gfs2_glock_dq(&ip->i_iopen_gh); | ||
324 | fail_iopen: | ||
325 | if (io_gl) | ||
326 | gfs2_glock_put(io_gl); | ||
327 | fail_put: | ||
328 | ip->i_gl->gl_object = NULL; | ||
329 | gfs2_glock_put(ip->i_gl); | ||
330 | fail: | 245 | fail: |
331 | iget_failed(inode); | 246 | gfs2_glock_dq_uninit(&i_gh); |
332 | return; | 247 | return error ? ERR_PTR(error) : inode; |
248 | fail_iput: | ||
249 | iput(inode); | ||
250 | goto fail; | ||
333 | } | 251 | } |
334 | 252 | ||
335 | static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) | 253 | static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) |
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index 6720d7d5fbc6..d8499fadcc53 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h | |||
@@ -99,7 +99,9 @@ err: | |||
99 | extern void gfs2_set_iop(struct inode *inode); | 99 | extern void gfs2_set_iop(struct inode *inode); |
100 | extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, | 100 | extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, |
101 | u64 no_addr, u64 no_formal_ino); | 101 | u64 no_addr, u64 no_formal_ino); |
102 | extern void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr); | 102 | extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, |
103 | u64 *no_formal_ino, | ||
104 | unsigned int blktype); | ||
103 | extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr); | 105 | extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr); |
104 | 106 | ||
105 | extern int gfs2_inode_refresh(struct gfs2_inode *ip); | 107 | extern int gfs2_inode_refresh(struct gfs2_inode *ip); |
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 58a9b9998b42..f606baf9ba72 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c | |||
@@ -631,6 +631,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, | |||
631 | struct fs_disk_quota *fdq) | 631 | struct fs_disk_quota *fdq) |
632 | { | 632 | { |
633 | struct inode *inode = &ip->i_inode; | 633 | struct inode *inode = &ip->i_inode; |
634 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
634 | struct address_space *mapping = inode->i_mapping; | 635 | struct address_space *mapping = inode->i_mapping; |
635 | unsigned long index = loc >> PAGE_CACHE_SHIFT; | 636 | unsigned long index = loc >> PAGE_CACHE_SHIFT; |
636 | unsigned offset = loc & (PAGE_CACHE_SIZE - 1); | 637 | unsigned offset = loc & (PAGE_CACHE_SIZE - 1); |
@@ -658,11 +659,11 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, | |||
658 | qd->qd_qb.qb_value = qp->qu_value; | 659 | qd->qd_qb.qb_value = qp->qu_value; |
659 | if (fdq) { | 660 | if (fdq) { |
660 | if (fdq->d_fieldmask & FS_DQ_BSOFT) { | 661 | if (fdq->d_fieldmask & FS_DQ_BSOFT) { |
661 | qp->qu_warn = cpu_to_be64(fdq->d_blk_softlimit); | 662 | qp->qu_warn = cpu_to_be64(fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift); |
662 | qd->qd_qb.qb_warn = qp->qu_warn; | 663 | qd->qd_qb.qb_warn = qp->qu_warn; |
663 | } | 664 | } |
664 | if (fdq->d_fieldmask & FS_DQ_BHARD) { | 665 | if (fdq->d_fieldmask & FS_DQ_BHARD) { |
665 | qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit); | 666 | qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift); |
666 | qd->qd_qb.qb_limit = qp->qu_limit; | 667 | qd->qd_qb.qb_limit = qp->qu_limit; |
667 | } | 668 | } |
668 | } | 669 | } |
@@ -1497,9 +1498,9 @@ static int gfs2_get_dqblk(struct super_block *sb, int type, qid_t id, | |||
1497 | fdq->d_version = FS_DQUOT_VERSION; | 1498 | fdq->d_version = FS_DQUOT_VERSION; |
1498 | fdq->d_flags = (type == QUOTA_USER) ? FS_USER_QUOTA : FS_GROUP_QUOTA; | 1499 | fdq->d_flags = (type == QUOTA_USER) ? FS_USER_QUOTA : FS_GROUP_QUOTA; |
1499 | fdq->d_id = id; | 1500 | fdq->d_id = id; |
1500 | fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit); | 1501 | fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit) << sdp->sd_fsb2bb_shift; |
1501 | fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn); | 1502 | fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn) << sdp->sd_fsb2bb_shift; |
1502 | fdq->d_bcount = be64_to_cpu(qlvb->qb_value); | 1503 | fdq->d_bcount = be64_to_cpu(qlvb->qb_value) << sdp->sd_fsb2bb_shift; |
1503 | 1504 | ||
1504 | gfs2_glock_dq_uninit(&q_gh); | 1505 | gfs2_glock_dq_uninit(&q_gh); |
1505 | out: | 1506 | out: |
@@ -1566,10 +1567,10 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, | |||
1566 | 1567 | ||
1567 | /* If nothing has changed, this is a no-op */ | 1568 | /* If nothing has changed, this is a no-op */ |
1568 | if ((fdq->d_fieldmask & FS_DQ_BSOFT) && | 1569 | if ((fdq->d_fieldmask & FS_DQ_BSOFT) && |
1569 | (fdq->d_blk_softlimit == be64_to_cpu(qd->qd_qb.qb_warn))) | 1570 | ((fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_warn))) |
1570 | fdq->d_fieldmask ^= FS_DQ_BSOFT; | 1571 | fdq->d_fieldmask ^= FS_DQ_BSOFT; |
1571 | if ((fdq->d_fieldmask & FS_DQ_BHARD) && | 1572 | if ((fdq->d_fieldmask & FS_DQ_BHARD) && |
1572 | (fdq->d_blk_hardlimit == be64_to_cpu(qd->qd_qb.qb_limit))) | 1573 | ((fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_limit))) |
1573 | fdq->d_fieldmask ^= FS_DQ_BHARD; | 1574 | fdq->d_fieldmask ^= FS_DQ_BHARD; |
1574 | if (fdq->d_fieldmask == 0) | 1575 | if (fdq->d_fieldmask == 0) |
1575 | goto out_i; | 1576 | goto out_i; |
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index bef3ab6cf5c1..33c8407b876f 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
@@ -963,17 +963,18 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al) | |||
963 | * The inode, if one has been found, in inode. | 963 | * The inode, if one has been found, in inode. |
964 | */ | 964 | */ |
965 | 965 | ||
966 | static u64 try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, | 966 | static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip) |
967 | u64 skip) | ||
968 | { | 967 | { |
969 | u32 goal = 0, block; | 968 | u32 goal = 0, block; |
970 | u64 no_addr; | 969 | u64 no_addr; |
971 | struct gfs2_sbd *sdp = rgd->rd_sbd; | 970 | struct gfs2_sbd *sdp = rgd->rd_sbd; |
972 | unsigned int n; | 971 | unsigned int n; |
972 | struct gfs2_glock *gl; | ||
973 | struct gfs2_inode *ip; | ||
974 | int error; | ||
975 | int found = 0; | ||
973 | 976 | ||
974 | for(;;) { | 977 | while (goal < rgd->rd_data) { |
975 | if (goal >= rgd->rd_data) | ||
976 | break; | ||
977 | down_write(&sdp->sd_log_flush_lock); | 978 | down_write(&sdp->sd_log_flush_lock); |
978 | n = 1; | 979 | n = 1; |
979 | block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED, | 980 | block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED, |
@@ -990,11 +991,32 @@ static u64 try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, | |||
990 | if (no_addr == skip) | 991 | if (no_addr == skip) |
991 | continue; | 992 | continue; |
992 | *last_unlinked = no_addr; | 993 | *last_unlinked = no_addr; |
993 | return no_addr; | 994 | |
995 | error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &gl); | ||
996 | if (error) | ||
997 | continue; | ||
998 | |||
999 | /* If the inode is already in cache, we can ignore it here | ||
1000 | * because the existing inode disposal code will deal with | ||
1001 | * it when all refs have gone away. Accessing gl_object like | ||
1002 | * this is not safe in general. Here it is ok because we do | ||
1003 | * not dereference the pointer, and we only need an approx | ||
1004 | * answer to whether it is NULL or not. | ||
1005 | */ | ||
1006 | ip = gl->gl_object; | ||
1007 | |||
1008 | if (ip || queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0) | ||
1009 | gfs2_glock_put(gl); | ||
1010 | else | ||
1011 | found++; | ||
1012 | |||
1013 | /* Limit reclaim to sensible number of tasks */ | ||
1014 | if (found > 2*NR_CPUS) | ||
1015 | return; | ||
994 | } | 1016 | } |
995 | 1017 | ||
996 | rgd->rd_flags &= ~GFS2_RDF_CHECK; | 1018 | rgd->rd_flags &= ~GFS2_RDF_CHECK; |
997 | return 0; | 1019 | return; |
998 | } | 1020 | } |
999 | 1021 | ||
1000 | /** | 1022 | /** |
@@ -1075,11 +1097,9 @@ static void forward_rgrp_set(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd) | |||
1075 | * Try to acquire rgrp in way which avoids contending with others. | 1097 | * Try to acquire rgrp in way which avoids contending with others. |
1076 | * | 1098 | * |
1077 | * Returns: errno | 1099 | * Returns: errno |
1078 | * unlinked: the block address of an unlinked block to be reclaimed | ||
1079 | */ | 1100 | */ |
1080 | 1101 | ||
1081 | static int get_local_rgrp(struct gfs2_inode *ip, u64 *unlinked, | 1102 | static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) |
1082 | u64 *last_unlinked) | ||
1083 | { | 1103 | { |
1084 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1104 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
1085 | struct gfs2_rgrpd *rgd, *begin = NULL; | 1105 | struct gfs2_rgrpd *rgd, *begin = NULL; |
@@ -1089,7 +1109,6 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *unlinked, | |||
1089 | int loops = 0; | 1109 | int loops = 0; |
1090 | int error, rg_locked; | 1110 | int error, rg_locked; |
1091 | 1111 | ||
1092 | *unlinked = 0; | ||
1093 | rgd = gfs2_blk2rgrpd(sdp, ip->i_goal); | 1112 | rgd = gfs2_blk2rgrpd(sdp, ip->i_goal); |
1094 | 1113 | ||
1095 | while (rgd) { | 1114 | while (rgd) { |
@@ -1106,17 +1125,10 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *unlinked, | |||
1106 | case 0: | 1125 | case 0: |
1107 | if (try_rgrp_fit(rgd, al)) | 1126 | if (try_rgrp_fit(rgd, al)) |
1108 | goto out; | 1127 | goto out; |
1109 | /* If the rg came in already locked, there's no | 1128 | if (rgd->rd_flags & GFS2_RDF_CHECK) |
1110 | way we can recover from a failed try_rgrp_unlink | 1129 | try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr); |
1111 | because that would require an iput which can only | ||
1112 | happen after the rgrp is unlocked. */ | ||
1113 | if (!rg_locked && rgd->rd_flags & GFS2_RDF_CHECK) | ||
1114 | *unlinked = try_rgrp_unlink(rgd, last_unlinked, | ||
1115 | ip->i_no_addr); | ||
1116 | if (!rg_locked) | 1130 | if (!rg_locked) |
1117 | gfs2_glock_dq_uninit(&al->al_rgd_gh); | 1131 | gfs2_glock_dq_uninit(&al->al_rgd_gh); |
1118 | if (*unlinked) | ||
1119 | return -EAGAIN; | ||
1120 | /* fall through */ | 1132 | /* fall through */ |
1121 | case GLR_TRYFAILED: | 1133 | case GLR_TRYFAILED: |
1122 | rgd = recent_rgrp_next(rgd); | 1134 | rgd = recent_rgrp_next(rgd); |
@@ -1145,13 +1157,10 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *unlinked, | |||
1145 | case 0: | 1157 | case 0: |
1146 | if (try_rgrp_fit(rgd, al)) | 1158 | if (try_rgrp_fit(rgd, al)) |
1147 | goto out; | 1159 | goto out; |
1148 | if (!rg_locked && rgd->rd_flags & GFS2_RDF_CHECK) | 1160 | if (rgd->rd_flags & GFS2_RDF_CHECK) |
1149 | *unlinked = try_rgrp_unlink(rgd, last_unlinked, | 1161 | try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr); |
1150 | ip->i_no_addr); | ||
1151 | if (!rg_locked) | 1162 | if (!rg_locked) |
1152 | gfs2_glock_dq_uninit(&al->al_rgd_gh); | 1163 | gfs2_glock_dq_uninit(&al->al_rgd_gh); |
1153 | if (*unlinked) | ||
1154 | return -EAGAIN; | ||
1155 | break; | 1164 | break; |
1156 | 1165 | ||
1157 | case GLR_TRYFAILED: | 1166 | case GLR_TRYFAILED: |
@@ -1204,12 +1213,12 @@ int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex, | |||
1204 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1213 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
1205 | struct gfs2_alloc *al = ip->i_alloc; | 1214 | struct gfs2_alloc *al = ip->i_alloc; |
1206 | int error = 0; | 1215 | int error = 0; |
1207 | u64 last_unlinked = NO_BLOCK, unlinked; | 1216 | u64 last_unlinked = NO_BLOCK; |
1217 | int tries = 0; | ||
1208 | 1218 | ||
1209 | if (gfs2_assert_warn(sdp, al->al_requested)) | 1219 | if (gfs2_assert_warn(sdp, al->al_requested)) |
1210 | return -EINVAL; | 1220 | return -EINVAL; |
1211 | 1221 | ||
1212 | try_again: | ||
1213 | if (hold_rindex) { | 1222 | if (hold_rindex) { |
1214 | /* We need to hold the rindex unless the inode we're using is | 1223 | /* We need to hold the rindex unless the inode we're using is |
1215 | the rindex itself, in which case it's already held. */ | 1224 | the rindex itself, in which case it's already held. */ |
@@ -1218,31 +1227,23 @@ try_again: | |||
1218 | else if (!sdp->sd_rgrps) /* We may not have the rindex read | 1227 | else if (!sdp->sd_rgrps) /* We may not have the rindex read |
1219 | in, so: */ | 1228 | in, so: */ |
1220 | error = gfs2_ri_update_special(ip); | 1229 | error = gfs2_ri_update_special(ip); |
1230 | if (error) | ||
1231 | return error; | ||
1221 | } | 1232 | } |
1222 | 1233 | ||
1223 | if (error) | 1234 | do { |
1224 | return error; | 1235 | error = get_local_rgrp(ip, &last_unlinked); |
1236 | /* If there is no space, flushing the log may release some */ | ||
1237 | if (error) | ||
1238 | gfs2_log_flush(sdp, NULL); | ||
1239 | } while (error && tries++ < 3); | ||
1225 | 1240 | ||
1226 | /* Find an rgrp suitable for allocation. If it encounters any unlinked | ||
1227 | dinodes along the way, error will equal -EAGAIN and unlinked will | ||
1228 | contains it block address. We then need to look up that inode and | ||
1229 | try to free it, and try the allocation again. */ | ||
1230 | error = get_local_rgrp(ip, &unlinked, &last_unlinked); | ||
1231 | if (error) { | 1241 | if (error) { |
1232 | if (hold_rindex && ip != GFS2_I(sdp->sd_rindex)) | 1242 | if (hold_rindex && ip != GFS2_I(sdp->sd_rindex)) |
1233 | gfs2_glock_dq_uninit(&al->al_ri_gh); | 1243 | gfs2_glock_dq_uninit(&al->al_ri_gh); |
1234 | if (error != -EAGAIN) | 1244 | return error; |
1235 | return error; | ||
1236 | |||
1237 | gfs2_process_unlinked_inode(ip->i_inode.i_sb, unlinked); | ||
1238 | /* regardless of whether or not gfs2_process_unlinked_inode | ||
1239 | was successful, we don't want to repeat it again. */ | ||
1240 | last_unlinked = unlinked; | ||
1241 | gfs2_log_flush(sdp, NULL); | ||
1242 | error = 0; | ||
1243 | |||
1244 | goto try_again; | ||
1245 | } | 1245 | } |
1246 | |||
1246 | /* no error, so we have the rgrp set in the inode's allocation. */ | 1247 | /* no error, so we have the rgrp set in the inode's allocation. */ |
1247 | al->al_file = file; | 1248 | al->al_file = file; |
1248 | al->al_line = line; | 1249 | al->al_line = line; |
diff --git a/fs/hpfs/buffer.c b/fs/hpfs/buffer.c index eac5f96323e3..793cb9d943d2 100644 --- a/fs/hpfs/buffer.c +++ b/fs/hpfs/buffer.c | |||
@@ -14,7 +14,7 @@ void hpfs_lock_creation(struct super_block *s) | |||
14 | #ifdef DEBUG_LOCKS | 14 | #ifdef DEBUG_LOCKS |
15 | printk("lock creation\n"); | 15 | printk("lock creation\n"); |
16 | #endif | 16 | #endif |
17 | down(&hpfs_sb(s)->hpfs_creation_de); | 17 | mutex_lock(&hpfs_sb(s)->hpfs_creation_de); |
18 | } | 18 | } |
19 | 19 | ||
20 | void hpfs_unlock_creation(struct super_block *s) | 20 | void hpfs_unlock_creation(struct super_block *s) |
@@ -22,7 +22,7 @@ void hpfs_unlock_creation(struct super_block *s) | |||
22 | #ifdef DEBUG_LOCKS | 22 | #ifdef DEBUG_LOCKS |
23 | printk("unlock creation\n"); | 23 | printk("unlock creation\n"); |
24 | #endif | 24 | #endif |
25 | up(&hpfs_sb(s)->hpfs_creation_de); | 25 | mutex_unlock(&hpfs_sb(s)->hpfs_creation_de); |
26 | } | 26 | } |
27 | 27 | ||
28 | /* Map a sector into a buffer and return pointers to it and to the buffer. */ | 28 | /* Map a sector into a buffer and return pointers to it and to the buffer. */ |
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index b59eac0232a0..2fee17d0d9ab 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h | |||
@@ -87,7 +87,7 @@ struct hpfs_sb_info { | |||
87 | unsigned *sb_bmp_dir; /* main bitmap directory */ | 87 | unsigned *sb_bmp_dir; /* main bitmap directory */ |
88 | unsigned sb_c_bitmap; /* current bitmap */ | 88 | unsigned sb_c_bitmap; /* current bitmap */ |
89 | unsigned sb_max_fwd_alloc; /* max forwad allocation */ | 89 | unsigned sb_max_fwd_alloc; /* max forwad allocation */ |
90 | struct semaphore hpfs_creation_de; /* when creating dirents, nobody else | 90 | struct mutex hpfs_creation_de; /* when creating dirents, nobody else |
91 | can alloc blocks */ | 91 | can alloc blocks */ |
92 | /*unsigned sb_mounting : 1;*/ | 92 | /*unsigned sb_mounting : 1;*/ |
93 | int sb_timeshift; | 93 | int sb_timeshift; |
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index bb69389972eb..6c5f01597c3a 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c | |||
@@ -491,7 +491,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) | |||
491 | sbi->sb_bmp_dir = NULL; | 491 | sbi->sb_bmp_dir = NULL; |
492 | sbi->sb_cp_table = NULL; | 492 | sbi->sb_cp_table = NULL; |
493 | 493 | ||
494 | init_MUTEX(&sbi->hpfs_creation_de); | 494 | mutex_init(&sbi->hpfs_creation_de); |
495 | 495 | ||
496 | uid = current_uid(); | 496 | uid = current_uid(); |
497 | gid = current_gid(); | 497 | gid = current_gid(); |
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index d6cfac1f0a40..a5fe68189eed 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -932,8 +932,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag, | |||
932 | if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) { | 932 | if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) { |
933 | *user = current_user(); | 933 | *user = current_user(); |
934 | if (user_shm_lock(size, *user)) { | 934 | if (user_shm_lock(size, *user)) { |
935 | WARN_ONCE(1, | 935 | printk_once(KERN_WARNING "Using mlock ulimits for SHM_HUGETLB is deprecated\n"); |
936 | "Using mlock ulimits for SHM_HUGETLB deprecated\n"); | ||
937 | } else { | 936 | } else { |
938 | *user = NULL; | 937 | *user = NULL; |
939 | return ERR_PTR(-EPERM); | 938 | return ERR_PTR(-EPERM); |
diff --git a/fs/ioctl.c b/fs/ioctl.c index e92fdbb3bc3a..d6cc16476620 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c | |||
@@ -6,7 +6,6 @@ | |||
6 | 6 | ||
7 | #include <linux/syscalls.h> | 7 | #include <linux/syscalls.h> |
8 | #include <linux/mm.h> | 8 | #include <linux/mm.h> |
9 | #include <linux/smp_lock.h> | ||
10 | #include <linux/capability.h> | 9 | #include <linux/capability.h> |
11 | #include <linux/file.h> | 10 | #include <linux/file.h> |
12 | #include <linux/fs.h> | 11 | #include <linux/fs.h> |
@@ -530,41 +529,6 @@ static int ioctl_fsthaw(struct file *filp) | |||
530 | return thaw_super(sb); | 529 | return thaw_super(sb); |
531 | } | 530 | } |
532 | 531 | ||
533 | static int ioctl_fstrim(struct file *filp, void __user *argp) | ||
534 | { | ||
535 | struct super_block *sb = filp->f_path.dentry->d_inode->i_sb; | ||
536 | struct fstrim_range range; | ||
537 | int ret = 0; | ||
538 | |||
539 | if (!capable(CAP_SYS_ADMIN)) | ||
540 | return -EPERM; | ||
541 | |||
542 | /* If filesystem doesn't support trim feature, return. */ | ||
543 | if (sb->s_op->trim_fs == NULL) | ||
544 | return -EOPNOTSUPP; | ||
545 | |||
546 | /* If a blockdevice-backed filesystem isn't specified, return EINVAL. */ | ||
547 | if (sb->s_bdev == NULL) | ||
548 | return -EINVAL; | ||
549 | |||
550 | if (argp == NULL) { | ||
551 | range.start = 0; | ||
552 | range.len = ULLONG_MAX; | ||
553 | range.minlen = 0; | ||
554 | } else if (copy_from_user(&range, argp, sizeof(range))) | ||
555 | return -EFAULT; | ||
556 | |||
557 | ret = sb->s_op->trim_fs(sb, &range); | ||
558 | if (ret < 0) | ||
559 | return ret; | ||
560 | |||
561 | if ((argp != NULL) && | ||
562 | (copy_to_user(argp, &range, sizeof(range)))) | ||
563 | return -EFAULT; | ||
564 | |||
565 | return 0; | ||
566 | } | ||
567 | |||
568 | /* | 532 | /* |
569 | * When you add any new common ioctls to the switches above and below | 533 | * When you add any new common ioctls to the switches above and below |
570 | * please update compat_sys_ioctl() too. | 534 | * please update compat_sys_ioctl() too. |
@@ -615,10 +579,6 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, | |||
615 | error = ioctl_fsthaw(filp); | 579 | error = ioctl_fsthaw(filp); |
616 | break; | 580 | break; |
617 | 581 | ||
618 | case FITRIM: | ||
619 | error = ioctl_fstrim(filp, argp); | ||
620 | break; | ||
621 | |||
622 | case FS_IOC_FIEMAP: | 582 | case FS_IOC_FIEMAP: |
623 | return ioctl_fiemap(filp, arg); | 583 | return ioctl_fiemap(filp, arg); |
624 | 584 | ||
diff --git a/fs/ioprio.c b/fs/ioprio.c index 748cfb92dcc6..7da2a06508e5 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c | |||
@@ -103,12 +103,7 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio) | |||
103 | } | 103 | } |
104 | 104 | ||
105 | ret = -ESRCH; | 105 | ret = -ESRCH; |
106 | /* | 106 | rcu_read_lock(); |
107 | * We want IOPRIO_WHO_PGRP/IOPRIO_WHO_USER to be "atomic", | ||
108 | * so we can't use rcu_read_lock(). See re-copy of ->ioprio | ||
109 | * in copy_process(). | ||
110 | */ | ||
111 | read_lock(&tasklist_lock); | ||
112 | switch (which) { | 107 | switch (which) { |
113 | case IOPRIO_WHO_PROCESS: | 108 | case IOPRIO_WHO_PROCESS: |
114 | if (!who) | 109 | if (!who) |
@@ -153,7 +148,7 @@ free_uid: | |||
153 | ret = -EINVAL; | 148 | ret = -EINVAL; |
154 | } | 149 | } |
155 | 150 | ||
156 | read_unlock(&tasklist_lock); | 151 | rcu_read_unlock(); |
157 | return ret; | 152 | return ret; |
158 | } | 153 | } |
159 | 154 | ||
@@ -197,7 +192,7 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who) | |||
197 | int ret = -ESRCH; | 192 | int ret = -ESRCH; |
198 | int tmpio; | 193 | int tmpio; |
199 | 194 | ||
200 | read_lock(&tasklist_lock); | 195 | rcu_read_lock(); |
201 | switch (which) { | 196 | switch (which) { |
202 | case IOPRIO_WHO_PROCESS: | 197 | case IOPRIO_WHO_PROCESS: |
203 | if (!who) | 198 | if (!who) |
@@ -250,6 +245,6 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who) | |||
250 | ret = -EINVAL; | 245 | ret = -EINVAL; |
251 | } | 246 | } |
252 | 247 | ||
253 | read_unlock(&tasklist_lock); | 248 | rcu_read_unlock(); |
254 | return ret; | 249 | return ret; |
255 | } | 250 | } |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 538417c1fdbb..f837ba953529 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -899,6 +899,14 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev, | |||
899 | 899 | ||
900 | /* journal descriptor can store up to n blocks -bzzz */ | 900 | /* journal descriptor can store up to n blocks -bzzz */ |
901 | journal->j_blocksize = blocksize; | 901 | journal->j_blocksize = blocksize; |
902 | journal->j_dev = bdev; | ||
903 | journal->j_fs_dev = fs_dev; | ||
904 | journal->j_blk_offset = start; | ||
905 | journal->j_maxlen = len; | ||
906 | bdevname(journal->j_dev, journal->j_devname); | ||
907 | p = journal->j_devname; | ||
908 | while ((p = strchr(p, '/'))) | ||
909 | *p = '!'; | ||
902 | jbd2_stats_proc_init(journal); | 910 | jbd2_stats_proc_init(journal); |
903 | n = journal->j_blocksize / sizeof(journal_block_tag_t); | 911 | n = journal->j_blocksize / sizeof(journal_block_tag_t); |
904 | journal->j_wbufsize = n; | 912 | journal->j_wbufsize = n; |
@@ -908,14 +916,6 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev, | |||
908 | __func__); | 916 | __func__); |
909 | goto out_err; | 917 | goto out_err; |
910 | } | 918 | } |
911 | journal->j_dev = bdev; | ||
912 | journal->j_fs_dev = fs_dev; | ||
913 | journal->j_blk_offset = start; | ||
914 | journal->j_maxlen = len; | ||
915 | bdevname(journal->j_dev, journal->j_devname); | ||
916 | p = journal->j_devname; | ||
917 | while ((p = strchr(p, '/'))) | ||
918 | *p = '!'; | ||
919 | 919 | ||
920 | bh = __getblk(journal->j_dev, start, journal->j_blocksize); | 920 | bh = __getblk(journal->j_dev, start, journal->j_blocksize); |
921 | if (!bh) { | 921 | if (!bh) { |
@@ -1838,7 +1838,6 @@ size_t journal_tag_bytes(journal_t *journal) | |||
1838 | */ | 1838 | */ |
1839 | #define JBD2_MAX_SLABS 8 | 1839 | #define JBD2_MAX_SLABS 8 |
1840 | static struct kmem_cache *jbd2_slab[JBD2_MAX_SLABS]; | 1840 | static struct kmem_cache *jbd2_slab[JBD2_MAX_SLABS]; |
1841 | static DECLARE_MUTEX(jbd2_slab_create_sem); | ||
1842 | 1841 | ||
1843 | static const char *jbd2_slab_names[JBD2_MAX_SLABS] = { | 1842 | static const char *jbd2_slab_names[JBD2_MAX_SLABS] = { |
1844 | "jbd2_1k", "jbd2_2k", "jbd2_4k", "jbd2_8k", | 1843 | "jbd2_1k", "jbd2_2k", "jbd2_4k", "jbd2_8k", |
@@ -1859,6 +1858,7 @@ static void jbd2_journal_destroy_slabs(void) | |||
1859 | 1858 | ||
1860 | static int jbd2_journal_create_slab(size_t size) | 1859 | static int jbd2_journal_create_slab(size_t size) |
1861 | { | 1860 | { |
1861 | static DEFINE_MUTEX(jbd2_slab_create_mutex); | ||
1862 | int i = order_base_2(size) - 10; | 1862 | int i = order_base_2(size) - 10; |
1863 | size_t slab_size; | 1863 | size_t slab_size; |
1864 | 1864 | ||
@@ -1870,16 +1870,16 @@ static int jbd2_journal_create_slab(size_t size) | |||
1870 | 1870 | ||
1871 | if (unlikely(i < 0)) | 1871 | if (unlikely(i < 0)) |
1872 | i = 0; | 1872 | i = 0; |
1873 | down(&jbd2_slab_create_sem); | 1873 | mutex_lock(&jbd2_slab_create_mutex); |
1874 | if (jbd2_slab[i]) { | 1874 | if (jbd2_slab[i]) { |
1875 | up(&jbd2_slab_create_sem); | 1875 | mutex_unlock(&jbd2_slab_create_mutex); |
1876 | return 0; /* Already created */ | 1876 | return 0; /* Already created */ |
1877 | } | 1877 | } |
1878 | 1878 | ||
1879 | slab_size = 1 << (i+10); | 1879 | slab_size = 1 << (i+10); |
1880 | jbd2_slab[i] = kmem_cache_create(jbd2_slab_names[i], slab_size, | 1880 | jbd2_slab[i] = kmem_cache_create(jbd2_slab_names[i], slab_size, |
1881 | slab_size, 0, NULL); | 1881 | slab_size, 0, NULL); |
1882 | up(&jbd2_slab_create_sem); | 1882 | mutex_unlock(&jbd2_slab_create_mutex); |
1883 | if (!jbd2_slab[i]) { | 1883 | if (!jbd2_slab[i]) { |
1884 | printk(KERN_EMERG "JBD2: no memory for jbd2_slab cache\n"); | 1884 | printk(KERN_EMERG "JBD2: no memory for jbd2_slab cache\n"); |
1885 | return -ENOMEM; | 1885 | return -ENOMEM; |
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index d5bb86866e6c..25509eb28fd7 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c | |||
@@ -14,7 +14,6 @@ | |||
14 | #include <linux/sunrpc/clnt.h> | 14 | #include <linux/sunrpc/clnt.h> |
15 | #include <linux/sunrpc/svc.h> | 15 | #include <linux/sunrpc/svc.h> |
16 | #include <linux/lockd/lockd.h> | 16 | #include <linux/lockd/lockd.h> |
17 | #include <linux/smp_lock.h> | ||
18 | #include <linux/kthread.h> | 17 | #include <linux/kthread.h> |
19 | 18 | ||
20 | #define NLMDBG_FACILITY NLMDBG_CLIENT | 19 | #define NLMDBG_FACILITY NLMDBG_CLIENT |
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 47ea1e1925b8..332c54cf75e0 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c | |||
@@ -7,7 +7,6 @@ | |||
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/module.h> | 9 | #include <linux/module.h> |
10 | #include <linux/smp_lock.h> | ||
11 | #include <linux/slab.h> | 10 | #include <linux/slab.h> |
12 | #include <linux/types.h> | 11 | #include <linux/types.h> |
13 | #include <linux/errno.h> | 12 | #include <linux/errno.h> |
diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 25e21e4023b2..ed0c59fe23ce 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c | |||
@@ -124,7 +124,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni) | |||
124 | continue; | 124 | continue; |
125 | if (host->h_server != ni->server) | 125 | if (host->h_server != ni->server) |
126 | continue; | 126 | continue; |
127 | if (ni->server && | 127 | if (ni->server && ni->src_len != 0 && |
128 | !rpc_cmp_addr(nlm_srcaddr(host), ni->src_sap)) | 128 | !rpc_cmp_addr(nlm_srcaddr(host), ni->src_sap)) |
129 | continue; | 129 | continue; |
130 | 130 | ||
@@ -167,6 +167,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni) | |||
167 | host->h_addrlen = ni->salen; | 167 | host->h_addrlen = ni->salen; |
168 | rpc_set_port(nlm_addr(host), 0); | 168 | rpc_set_port(nlm_addr(host), 0); |
169 | memcpy(nlm_srcaddr(host), ni->src_sap, ni->src_len); | 169 | memcpy(nlm_srcaddr(host), ni->src_sap, ni->src_len); |
170 | host->h_srcaddrlen = ni->src_len; | ||
170 | host->h_version = ni->version; | 171 | host->h_version = ni->version; |
171 | host->h_proto = ni->protocol; | 172 | host->h_proto = ni->protocol; |
172 | host->h_rpcclnt = NULL; | 173 | host->h_rpcclnt = NULL; |
@@ -238,9 +239,6 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap, | |||
238 | const char *hostname, | 239 | const char *hostname, |
239 | int noresvport) | 240 | int noresvport) |
240 | { | 241 | { |
241 | const struct sockaddr source = { | ||
242 | .sa_family = AF_UNSPEC, | ||
243 | }; | ||
244 | struct nlm_lookup_host_info ni = { | 242 | struct nlm_lookup_host_info ni = { |
245 | .server = 0, | 243 | .server = 0, |
246 | .sap = sap, | 244 | .sap = sap, |
@@ -249,8 +247,6 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap, | |||
249 | .version = version, | 247 | .version = version, |
250 | .hostname = hostname, | 248 | .hostname = hostname, |
251 | .hostname_len = strlen(hostname), | 249 | .hostname_len = strlen(hostname), |
252 | .src_sap = &source, | ||
253 | .src_len = sizeof(source), | ||
254 | .noresvport = noresvport, | 250 | .noresvport = noresvport, |
255 | }; | 251 | }; |
256 | 252 | ||
@@ -357,7 +353,6 @@ nlm_bind_host(struct nlm_host *host) | |||
357 | .protocol = host->h_proto, | 353 | .protocol = host->h_proto, |
358 | .address = nlm_addr(host), | 354 | .address = nlm_addr(host), |
359 | .addrsize = host->h_addrlen, | 355 | .addrsize = host->h_addrlen, |
360 | .saddress = nlm_srcaddr(host), | ||
361 | .timeout = &timeparms, | 356 | .timeout = &timeparms, |
362 | .servername = host->h_name, | 357 | .servername = host->h_name, |
363 | .program = &nlm_program, | 358 | .program = &nlm_program, |
@@ -376,6 +371,8 @@ nlm_bind_host(struct nlm_host *host) | |||
376 | args.flags |= RPC_CLNT_CREATE_HARDRTRY; | 371 | args.flags |= RPC_CLNT_CREATE_HARDRTRY; |
377 | if (host->h_noresvport) | 372 | if (host->h_noresvport) |
378 | args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; | 373 | args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; |
374 | if (host->h_srcaddrlen) | ||
375 | args.saddress = nlm_srcaddr(host); | ||
379 | 376 | ||
380 | clnt = rpc_create(&args); | 377 | clnt = rpc_create(&args); |
381 | if (!IS_ERR(clnt)) | 378 | if (!IS_ERR(clnt)) |
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index a336e832475d..38d261192453 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c | |||
@@ -9,7 +9,6 @@ | |||
9 | 9 | ||
10 | #include <linux/types.h> | 10 | #include <linux/types.h> |
11 | #include <linux/time.h> | 11 | #include <linux/time.h> |
12 | #include <linux/smp_lock.h> | ||
13 | #include <linux/lockd/lockd.h> | 12 | #include <linux/lockd/lockd.h> |
14 | #include <linux/lockd/share.h> | 13 | #include <linux/lockd/share.h> |
15 | 14 | ||
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index c462d346acbd..ef5659b211e9 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c | |||
@@ -25,7 +25,6 @@ | |||
25 | #include <linux/errno.h> | 25 | #include <linux/errno.h> |
26 | #include <linux/kernel.h> | 26 | #include <linux/kernel.h> |
27 | #include <linux/sched.h> | 27 | #include <linux/sched.h> |
28 | #include <linux/smp_lock.h> | ||
29 | #include <linux/sunrpc/clnt.h> | 28 | #include <linux/sunrpc/clnt.h> |
30 | #include <linux/sunrpc/svc.h> | 29 | #include <linux/sunrpc/svc.h> |
31 | #include <linux/lockd/nlm.h> | 30 | #include <linux/lockd/nlm.h> |
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index c3069f38d602..0caea5310ac3 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c | |||
@@ -9,7 +9,6 @@ | |||
9 | 9 | ||
10 | #include <linux/types.h> | 10 | #include <linux/types.h> |
11 | #include <linux/time.h> | 11 | #include <linux/time.h> |
12 | #include <linux/smp_lock.h> | ||
13 | #include <linux/lockd/lockd.h> | 12 | #include <linux/lockd/lockd.h> |
14 | #include <linux/lockd/share.h> | 13 | #include <linux/lockd/share.h> |
15 | 14 | ||
diff --git a/fs/locks.c b/fs/locks.c index 50ec15927aab..8729347bcd1a 100644 --- a/fs/locks.c +++ b/fs/locks.c | |||
@@ -122,7 +122,6 @@ | |||
122 | #include <linux/module.h> | 122 | #include <linux/module.h> |
123 | #include <linux/security.h> | 123 | #include <linux/security.h> |
124 | #include <linux/slab.h> | 124 | #include <linux/slab.h> |
125 | #include <linux/smp_lock.h> | ||
126 | #include <linux/syscalls.h> | 125 | #include <linux/syscalls.h> |
127 | #include <linux/time.h> | 126 | #include <linux/time.h> |
128 | #include <linux/rcupdate.h> | 127 | #include <linux/rcupdate.h> |
@@ -186,7 +185,7 @@ void locks_release_private(struct file_lock *fl) | |||
186 | EXPORT_SYMBOL_GPL(locks_release_private); | 185 | EXPORT_SYMBOL_GPL(locks_release_private); |
187 | 186 | ||
188 | /* Free a lock which is not in use. */ | 187 | /* Free a lock which is not in use. */ |
189 | static void locks_free_lock(struct file_lock *fl) | 188 | void locks_free_lock(struct file_lock *fl) |
190 | { | 189 | { |
191 | BUG_ON(waitqueue_active(&fl->fl_wait)); | 190 | BUG_ON(waitqueue_active(&fl->fl_wait)); |
192 | BUG_ON(!list_empty(&fl->fl_block)); | 191 | BUG_ON(!list_empty(&fl->fl_block)); |
@@ -195,6 +194,7 @@ static void locks_free_lock(struct file_lock *fl) | |||
195 | locks_release_private(fl); | 194 | locks_release_private(fl); |
196 | kmem_cache_free(filelock_cache, fl); | 195 | kmem_cache_free(filelock_cache, fl); |
197 | } | 196 | } |
197 | EXPORT_SYMBOL(locks_free_lock); | ||
198 | 198 | ||
199 | void locks_init_lock(struct file_lock *fl) | 199 | void locks_init_lock(struct file_lock *fl) |
200 | { | 200 | { |
@@ -234,11 +234,8 @@ static void locks_copy_private(struct file_lock *new, struct file_lock *fl) | |||
234 | fl->fl_ops->fl_copy_lock(new, fl); | 234 | fl->fl_ops->fl_copy_lock(new, fl); |
235 | new->fl_ops = fl->fl_ops; | 235 | new->fl_ops = fl->fl_ops; |
236 | } | 236 | } |
237 | if (fl->fl_lmops) { | 237 | if (fl->fl_lmops) |
238 | if (fl->fl_lmops->fl_copy_lock) | ||
239 | fl->fl_lmops->fl_copy_lock(new, fl); | ||
240 | new->fl_lmops = fl->fl_lmops; | 238 | new->fl_lmops = fl->fl_lmops; |
241 | } | ||
242 | } | 239 | } |
243 | 240 | ||
244 | /* | 241 | /* |
@@ -1371,20 +1368,22 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp) | |||
1371 | struct inode *inode = dentry->d_inode; | 1368 | struct inode *inode = dentry->d_inode; |
1372 | int error, rdlease_count = 0, wrlease_count = 0; | 1369 | int error, rdlease_count = 0, wrlease_count = 0; |
1373 | 1370 | ||
1371 | lease = *flp; | ||
1372 | |||
1373 | error = -EACCES; | ||
1374 | if ((current_fsuid() != inode->i_uid) && !capable(CAP_LEASE)) | 1374 | if ((current_fsuid() != inode->i_uid) && !capable(CAP_LEASE)) |
1375 | return -EACCES; | 1375 | goto out; |
1376 | error = -EINVAL; | ||
1376 | if (!S_ISREG(inode->i_mode)) | 1377 | if (!S_ISREG(inode->i_mode)) |
1377 | return -EINVAL; | 1378 | goto out; |
1378 | error = security_file_lock(filp, arg); | 1379 | error = security_file_lock(filp, arg); |
1379 | if (error) | 1380 | if (error) |
1380 | return error; | 1381 | goto out; |
1381 | 1382 | ||
1382 | time_out_leases(inode); | 1383 | time_out_leases(inode); |
1383 | 1384 | ||
1384 | BUG_ON(!(*flp)->fl_lmops->fl_break); | 1385 | BUG_ON(!(*flp)->fl_lmops->fl_break); |
1385 | 1386 | ||
1386 | lease = *flp; | ||
1387 | |||
1388 | if (arg != F_UNLCK) { | 1387 | if (arg != F_UNLCK) { |
1389 | error = -EAGAIN; | 1388 | error = -EAGAIN; |
1390 | if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) | 1389 | if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) |
@@ -1425,8 +1424,9 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp) | |||
1425 | goto out; | 1424 | goto out; |
1426 | 1425 | ||
1427 | if (my_before != NULL) { | 1426 | if (my_before != NULL) { |
1428 | *flp = *my_before; | ||
1429 | error = lease->fl_lmops->fl_change(my_before, arg); | 1427 | error = lease->fl_lmops->fl_change(my_before, arg); |
1428 | if (!error) | ||
1429 | *flp = *my_before; | ||
1430 | goto out; | 1430 | goto out; |
1431 | } | 1431 | } |
1432 | 1432 | ||
@@ -1441,7 +1441,6 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp) | |||
1441 | return 0; | 1441 | return 0; |
1442 | 1442 | ||
1443 | out: | 1443 | out: |
1444 | locks_free_lock(lease); | ||
1445 | return error; | 1444 | return error; |
1446 | } | 1445 | } |
1447 | EXPORT_SYMBOL(generic_setlease); | 1446 | EXPORT_SYMBOL(generic_setlease); |
@@ -1493,21 +1492,19 @@ int vfs_setlease(struct file *filp, long arg, struct file_lock **lease) | |||
1493 | } | 1492 | } |
1494 | EXPORT_SYMBOL_GPL(vfs_setlease); | 1493 | EXPORT_SYMBOL_GPL(vfs_setlease); |
1495 | 1494 | ||
1496 | /** | 1495 | static int do_fcntl_delete_lease(struct file *filp) |
1497 | * fcntl_setlease - sets a lease on an open file | ||
1498 | * @fd: open file descriptor | ||
1499 | * @filp: file pointer | ||
1500 | * @arg: type of lease to obtain | ||
1501 | * | ||
1502 | * Call this fcntl to establish a lease on the file. | ||
1503 | * Note that you also need to call %F_SETSIG to | ||
1504 | * receive a signal when the lease is broken. | ||
1505 | */ | ||
1506 | int fcntl_setlease(unsigned int fd, struct file *filp, long arg) | ||
1507 | { | 1496 | { |
1508 | struct file_lock *fl; | 1497 | struct file_lock fl, *flp = &fl; |
1498 | |||
1499 | lease_init(filp, F_UNLCK, flp); | ||
1500 | |||
1501 | return vfs_setlease(filp, F_UNLCK, &flp); | ||
1502 | } | ||
1503 | |||
1504 | static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg) | ||
1505 | { | ||
1506 | struct file_lock *fl, *ret; | ||
1509 | struct fasync_struct *new; | 1507 | struct fasync_struct *new; |
1510 | struct inode *inode = filp->f_path.dentry->d_inode; | ||
1511 | int error; | 1508 | int error; |
1512 | 1509 | ||
1513 | fl = lease_alloc(filp, arg); | 1510 | fl = lease_alloc(filp, arg); |
@@ -1519,10 +1516,16 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg) | |||
1519 | locks_free_lock(fl); | 1516 | locks_free_lock(fl); |
1520 | return -ENOMEM; | 1517 | return -ENOMEM; |
1521 | } | 1518 | } |
1519 | ret = fl; | ||
1522 | lock_flocks(); | 1520 | lock_flocks(); |
1523 | error = __vfs_setlease(filp, arg, &fl); | 1521 | error = __vfs_setlease(filp, arg, &ret); |
1524 | if (error || arg == F_UNLCK) | 1522 | if (error) { |
1525 | goto out_unlock; | 1523 | unlock_flocks(); |
1524 | locks_free_lock(fl); | ||
1525 | goto out_free_fasync; | ||
1526 | } | ||
1527 | if (ret != fl) | ||
1528 | locks_free_lock(fl); | ||
1526 | 1529 | ||
1527 | /* | 1530 | /* |
1528 | * fasync_insert_entry() returns the old entry if any. | 1531 | * fasync_insert_entry() returns the old entry if any. |
@@ -1530,26 +1533,36 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg) | |||
1530 | * inserted it into the fasync list. Clear new so that | 1533 | * inserted it into the fasync list. Clear new so that |
1531 | * we don't release it here. | 1534 | * we don't release it here. |
1532 | */ | 1535 | */ |
1533 | if (!fasync_insert_entry(fd, filp, &fl->fl_fasync, new)) | 1536 | if (!fasync_insert_entry(fd, filp, &ret->fl_fasync, new)) |
1534 | new = NULL; | 1537 | new = NULL; |
1535 | 1538 | ||
1536 | if (error < 0) { | ||
1537 | /* remove lease just inserted by setlease */ | ||
1538 | fl->fl_type = F_UNLCK | F_INPROGRESS; | ||
1539 | fl->fl_break_time = jiffies - 10; | ||
1540 | time_out_leases(inode); | ||
1541 | goto out_unlock; | ||
1542 | } | ||
1543 | |||
1544 | error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); | 1539 | error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); |
1545 | out_unlock: | ||
1546 | unlock_flocks(); | 1540 | unlock_flocks(); |
1541 | |||
1542 | out_free_fasync: | ||
1547 | if (new) | 1543 | if (new) |
1548 | fasync_free(new); | 1544 | fasync_free(new); |
1549 | return error; | 1545 | return error; |
1550 | } | 1546 | } |
1551 | 1547 | ||
1552 | /** | 1548 | /** |
1549 | * fcntl_setlease - sets a lease on an open file | ||
1550 | * @fd: open file descriptor | ||
1551 | * @filp: file pointer | ||
1552 | * @arg: type of lease to obtain | ||
1553 | * | ||
1554 | * Call this fcntl to establish a lease on the file. | ||
1555 | * Note that you also need to call %F_SETSIG to | ||
1556 | * receive a signal when the lease is broken. | ||
1557 | */ | ||
1558 | int fcntl_setlease(unsigned int fd, struct file *filp, long arg) | ||
1559 | { | ||
1560 | if (arg == F_UNLCK) | ||
1561 | return do_fcntl_delete_lease(filp); | ||
1562 | return do_fcntl_add_lease(fd, filp, arg); | ||
1563 | } | ||
1564 | |||
1565 | /** | ||
1553 | * flock_lock_file_wait - Apply a FLOCK-style lock to a file | 1566 | * flock_lock_file_wait - Apply a FLOCK-style lock to a file |
1554 | * @filp: The file to apply the lock to | 1567 | * @filp: The file to apply the lock to |
1555 | * @fl: The lock to be applied | 1568 | * @fl: The lock to be applied |
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h index cd51a36b37f0..57afd4a6fabb 100644 --- a/fs/logfs/logfs.h +++ b/fs/logfs/logfs.h | |||
@@ -486,7 +486,7 @@ static inline int logfs_get_sb_bdev(struct logfs_super *s, | |||
486 | 486 | ||
487 | /* dev_mtd.c */ | 487 | /* dev_mtd.c */ |
488 | #ifdef CONFIG_MTD | 488 | #ifdef CONFIG_MTD |
489 | int logfs_get_sb_mtd(struct logfs_super *s, int mtdnr) | 489 | int logfs_get_sb_mtd(struct logfs_super *s, int mtdnr); |
490 | #else | 490 | #else |
491 | static inline int logfs_get_sb_mtd(struct logfs_super *s, int mtdnr) | 491 | static inline int logfs_get_sb_mtd(struct logfs_super *s, int mtdnr) |
492 | { | 492 | { |
diff --git a/fs/namei.c b/fs/namei.c index 5362af9b7372..4ff7ca530533 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -1748,6 +1748,9 @@ struct file *do_filp_open(int dfd, const char *pathname, | |||
1748 | if (!(open_flag & O_CREAT)) | 1748 | if (!(open_flag & O_CREAT)) |
1749 | mode = 0; | 1749 | mode = 0; |
1750 | 1750 | ||
1751 | /* Must never be set by userspace */ | ||
1752 | open_flag &= ~FMODE_NONOTIFY; | ||
1753 | |||
1751 | /* | 1754 | /* |
1752 | * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only | 1755 | * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only |
1753 | * check for O_DSYNC if the need any syncing at all we enforce it's | 1756 | * check for O_DSYNC if the need any syncing at all we enforce it's |
diff --git a/fs/namespace.c b/fs/namespace.c index 8a415c9c5e55..3dbfc072ec70 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -13,7 +13,6 @@ | |||
13 | #include <linux/sched.h> | 13 | #include <linux/sched.h> |
14 | #include <linux/spinlock.h> | 14 | #include <linux/spinlock.h> |
15 | #include <linux/percpu.h> | 15 | #include <linux/percpu.h> |
16 | #include <linux/smp_lock.h> | ||
17 | #include <linux/init.h> | 16 | #include <linux/init.h> |
18 | #include <linux/kernel.h> | 17 | #include <linux/kernel.h> |
19 | #include <linux/acct.h> | 18 | #include <linux/acct.h> |
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index aac8832e919e..f22b12e7d337 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c | |||
@@ -19,7 +19,6 @@ | |||
19 | #include <linux/mm.h> | 19 | #include <linux/mm.h> |
20 | #include <asm/uaccess.h> | 20 | #include <asm/uaccess.h> |
21 | #include <asm/byteorder.h> | 21 | #include <asm/byteorder.h> |
22 | #include <linux/smp_lock.h> | ||
23 | 22 | ||
24 | #include <linux/ncp_fs.h> | 23 | #include <linux/ncp_fs.h> |
25 | 24 | ||
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c index 6c754f70c529..cb50aaf981df 100644 --- a/fs/ncpfs/file.c +++ b/fs/ncpfs/file.c | |||
@@ -17,7 +17,6 @@ | |||
17 | #include <linux/mm.h> | 17 | #include <linux/mm.h> |
18 | #include <linux/vmalloc.h> | 18 | #include <linux/vmalloc.h> |
19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
20 | #include <linux/smp_lock.h> | ||
21 | 20 | ||
22 | #include <linux/ncp_fs.h> | 21 | #include <linux/ncp_fs.h> |
23 | #include "ncplib_kernel.h" | 22 | #include "ncplib_kernel.h" |
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index d290545aa0c4..8fb93b604e73 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c | |||
@@ -26,7 +26,6 @@ | |||
26 | #include <linux/slab.h> | 26 | #include <linux/slab.h> |
27 | #include <linux/vmalloc.h> | 27 | #include <linux/vmalloc.h> |
28 | #include <linux/init.h> | 28 | #include <linux/init.h> |
29 | #include <linux/smp_lock.h> | ||
30 | #include <linux/vfs.h> | 29 | #include <linux/vfs.h> |
31 | #include <linux/mount.h> | 30 | #include <linux/mount.h> |
32 | #include <linux/seq_file.h> | 31 | #include <linux/seq_file.h> |
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c index c2a1f9a155c3..d40a547e3377 100644 --- a/fs/ncpfs/ioctl.c +++ b/fs/ncpfs/ioctl.c | |||
@@ -17,7 +17,6 @@ | |||
17 | #include <linux/mount.h> | 17 | #include <linux/mount.h> |
18 | #include <linux/slab.h> | 18 | #include <linux/slab.h> |
19 | #include <linux/highuid.h> | 19 | #include <linux/highuid.h> |
20 | #include <linux/smp_lock.h> | ||
21 | #include <linux/vmalloc.h> | 20 | #include <linux/vmalloc.h> |
22 | #include <linux/sched.h> | 21 | #include <linux/sched.h> |
23 | 22 | ||
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index aeec017fe814..93a8b3bd69e3 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c | |||
@@ -9,7 +9,6 @@ | |||
9 | #include <linux/completion.h> | 9 | #include <linux/completion.h> |
10 | #include <linux/ip.h> | 10 | #include <linux/ip.h> |
11 | #include <linux/module.h> | 11 | #include <linux/module.h> |
12 | #include <linux/smp_lock.h> | ||
13 | #include <linux/sunrpc/svc.h> | 12 | #include <linux/sunrpc/svc.h> |
14 | #include <linux/sunrpc/svcsock.h> | 13 | #include <linux/sunrpc/svcsock.h> |
15 | #include <linux/nfs_fs.h> | 14 | #include <linux/nfs_fs.h> |
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 232a7eead33a..1fd62fc49be3 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c | |||
@@ -11,7 +11,6 @@ | |||
11 | #include <linux/module.h> | 11 | #include <linux/module.h> |
12 | #include <linux/sched.h> | 12 | #include <linux/sched.h> |
13 | #include <linux/slab.h> | 13 | #include <linux/slab.h> |
14 | #include <linux/smp_lock.h> | ||
15 | #include <linux/spinlock.h> | 14 | #include <linux/spinlock.h> |
16 | 15 | ||
17 | #include <linux/nfs4.h> | 16 | #include <linux/nfs4.h> |
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 07ac3847e562..996dd8989a91 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <linux/mount.h> | 34 | #include <linux/mount.h> |
35 | #include <linux/sched.h> | 35 | #include <linux/sched.h> |
36 | #include <linux/vmalloc.h> | 36 | #include <linux/vmalloc.h> |
37 | #include <linux/kmemleak.h> | ||
37 | 38 | ||
38 | #include "delegation.h" | 39 | #include "delegation.h" |
39 | #include "iostat.h" | 40 | #include "iostat.h" |
@@ -56,7 +57,7 @@ static int nfs_rename(struct inode *, struct dentry *, | |||
56 | struct inode *, struct dentry *); | 57 | struct inode *, struct dentry *); |
57 | static int nfs_fsync_dir(struct file *, int); | 58 | static int nfs_fsync_dir(struct file *, int); |
58 | static loff_t nfs_llseek_dir(struct file *, loff_t, int); | 59 | static loff_t nfs_llseek_dir(struct file *, loff_t, int); |
59 | static int nfs_readdir_clear_array(struct page*, gfp_t); | 60 | static void nfs_readdir_clear_array(struct page*); |
60 | 61 | ||
61 | const struct file_operations nfs_dir_operations = { | 62 | const struct file_operations nfs_dir_operations = { |
62 | .llseek = nfs_llseek_dir, | 63 | .llseek = nfs_llseek_dir, |
@@ -82,8 +83,8 @@ const struct inode_operations nfs_dir_inode_operations = { | |||
82 | .setattr = nfs_setattr, | 83 | .setattr = nfs_setattr, |
83 | }; | 84 | }; |
84 | 85 | ||
85 | const struct address_space_operations nfs_dir_addr_space_ops = { | 86 | const struct address_space_operations nfs_dir_aops = { |
86 | .releasepage = nfs_readdir_clear_array, | 87 | .freepage = nfs_readdir_clear_array, |
87 | }; | 88 | }; |
88 | 89 | ||
89 | #ifdef CONFIG_NFS_V3 | 90 | #ifdef CONFIG_NFS_V3 |
@@ -161,6 +162,7 @@ struct nfs_cache_array_entry { | |||
161 | u64 cookie; | 162 | u64 cookie; |
162 | u64 ino; | 163 | u64 ino; |
163 | struct qstr string; | 164 | struct qstr string; |
165 | unsigned char d_type; | ||
164 | }; | 166 | }; |
165 | 167 | ||
166 | struct nfs_cache_array { | 168 | struct nfs_cache_array { |
@@ -170,14 +172,13 @@ struct nfs_cache_array { | |||
170 | struct nfs_cache_array_entry array[0]; | 172 | struct nfs_cache_array_entry array[0]; |
171 | }; | 173 | }; |
172 | 174 | ||
173 | #define MAX_READDIR_ARRAY ((PAGE_SIZE - sizeof(struct nfs_cache_array)) / sizeof(struct nfs_cache_array_entry)) | ||
174 | |||
175 | typedef __be32 * (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int); | 175 | typedef __be32 * (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int); |
176 | typedef struct { | 176 | typedef struct { |
177 | struct file *file; | 177 | struct file *file; |
178 | struct page *page; | 178 | struct page *page; |
179 | unsigned long page_index; | 179 | unsigned long page_index; |
180 | u64 *dir_cookie; | 180 | u64 *dir_cookie; |
181 | u64 last_cookie; | ||
181 | loff_t current_index; | 182 | loff_t current_index; |
182 | decode_dirent_t decode; | 183 | decode_dirent_t decode; |
183 | 184 | ||
@@ -194,9 +195,13 @@ typedef struct { | |||
194 | static | 195 | static |
195 | struct nfs_cache_array *nfs_readdir_get_array(struct page *page) | 196 | struct nfs_cache_array *nfs_readdir_get_array(struct page *page) |
196 | { | 197 | { |
198 | void *ptr; | ||
197 | if (page == NULL) | 199 | if (page == NULL) |
198 | return ERR_PTR(-EIO); | 200 | return ERR_PTR(-EIO); |
199 | return (struct nfs_cache_array *)kmap(page); | 201 | ptr = kmap(page); |
202 | if (ptr == NULL) | ||
203 | return ERR_PTR(-ENOMEM); | ||
204 | return ptr; | ||
200 | } | 205 | } |
201 | 206 | ||
202 | static | 207 | static |
@@ -209,14 +214,15 @@ void nfs_readdir_release_array(struct page *page) | |||
209 | * we are freeing strings created by nfs_add_to_readdir_array() | 214 | * we are freeing strings created by nfs_add_to_readdir_array() |
210 | */ | 215 | */ |
211 | static | 216 | static |
212 | int nfs_readdir_clear_array(struct page *page, gfp_t mask) | 217 | void nfs_readdir_clear_array(struct page *page) |
213 | { | 218 | { |
214 | struct nfs_cache_array *array = nfs_readdir_get_array(page); | 219 | struct nfs_cache_array *array; |
215 | int i; | 220 | int i; |
221 | |||
222 | array = kmap_atomic(page, KM_USER0); | ||
216 | for (i = 0; i < array->size; i++) | 223 | for (i = 0; i < array->size; i++) |
217 | kfree(array->array[i].string.name); | 224 | kfree(array->array[i].string.name); |
218 | nfs_readdir_release_array(page); | 225 | kunmap_atomic(array, KM_USER0); |
219 | return 0; | ||
220 | } | 226 | } |
221 | 227 | ||
222 | /* | 228 | /* |
@@ -231,6 +237,11 @@ int nfs_readdir_make_qstr(struct qstr *string, const char *name, unsigned int le | |||
231 | string->name = kmemdup(name, len, GFP_KERNEL); | 237 | string->name = kmemdup(name, len, GFP_KERNEL); |
232 | if (string->name == NULL) | 238 | if (string->name == NULL) |
233 | return -ENOMEM; | 239 | return -ENOMEM; |
240 | /* | ||
241 | * Avoid a kmemleak false positive. The pointer to the name is stored | ||
242 | * in a page cache page which kmemleak does not scan. | ||
243 | */ | ||
244 | kmemleak_not_leak(string->name); | ||
234 | string->hash = full_name_hash(name, len); | 245 | string->hash = full_name_hash(name, len); |
235 | return 0; | 246 | return 0; |
236 | } | 247 | } |
@@ -244,20 +255,24 @@ int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page) | |||
244 | 255 | ||
245 | if (IS_ERR(array)) | 256 | if (IS_ERR(array)) |
246 | return PTR_ERR(array); | 257 | return PTR_ERR(array); |
247 | ret = -EIO; | ||
248 | if (array->size >= MAX_READDIR_ARRAY) | ||
249 | goto out; | ||
250 | 258 | ||
251 | cache_entry = &array->array[array->size]; | 259 | cache_entry = &array->array[array->size]; |
260 | |||
261 | /* Check that this entry lies within the page bounds */ | ||
262 | ret = -ENOSPC; | ||
263 | if ((char *)&cache_entry[1] - (char *)page_address(page) > PAGE_SIZE) | ||
264 | goto out; | ||
265 | |||
252 | cache_entry->cookie = entry->prev_cookie; | 266 | cache_entry->cookie = entry->prev_cookie; |
253 | cache_entry->ino = entry->ino; | 267 | cache_entry->ino = entry->ino; |
268 | cache_entry->d_type = entry->d_type; | ||
254 | ret = nfs_readdir_make_qstr(&cache_entry->string, entry->name, entry->len); | 269 | ret = nfs_readdir_make_qstr(&cache_entry->string, entry->name, entry->len); |
255 | if (ret) | 270 | if (ret) |
256 | goto out; | 271 | goto out; |
257 | array->last_cookie = entry->cookie; | 272 | array->last_cookie = entry->cookie; |
258 | if (entry->eof == 1) | ||
259 | array->eof_index = array->size; | ||
260 | array->size++; | 273 | array->size++; |
274 | if (entry->eof != 0) | ||
275 | array->eof_index = array->size; | ||
261 | out: | 276 | out: |
262 | nfs_readdir_release_array(page); | 277 | nfs_readdir_release_array(page); |
263 | return ret; | 278 | return ret; |
@@ -272,7 +287,7 @@ int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descri | |||
272 | if (diff < 0) | 287 | if (diff < 0) |
273 | goto out_eof; | 288 | goto out_eof; |
274 | if (diff >= array->size) { | 289 | if (diff >= array->size) { |
275 | if (array->eof_index > 0) | 290 | if (array->eof_index >= 0) |
276 | goto out_eof; | 291 | goto out_eof; |
277 | desc->current_index += array->size; | 292 | desc->current_index += array->size; |
278 | return -EAGAIN; | 293 | return -EAGAIN; |
@@ -281,8 +296,6 @@ int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descri | |||
281 | index = (unsigned int)diff; | 296 | index = (unsigned int)diff; |
282 | *desc->dir_cookie = array->array[index].cookie; | 297 | *desc->dir_cookie = array->array[index].cookie; |
283 | desc->cache_entry_index = index; | 298 | desc->cache_entry_index = index; |
284 | if (index == array->eof_index) | ||
285 | desc->eof = 1; | ||
286 | return 0; | 299 | return 0; |
287 | out_eof: | 300 | out_eof: |
288 | desc->eof = 1; | 301 | desc->eof = 1; |
@@ -296,17 +309,16 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des | |||
296 | int status = -EAGAIN; | 309 | int status = -EAGAIN; |
297 | 310 | ||
298 | for (i = 0; i < array->size; i++) { | 311 | for (i = 0; i < array->size; i++) { |
299 | if (i == array->eof_index) { | ||
300 | desc->eof = 1; | ||
301 | status = -EBADCOOKIE; | ||
302 | } | ||
303 | if (array->array[i].cookie == *desc->dir_cookie) { | 312 | if (array->array[i].cookie == *desc->dir_cookie) { |
304 | desc->cache_entry_index = i; | 313 | desc->cache_entry_index = i; |
305 | status = 0; | 314 | return 0; |
306 | break; | ||
307 | } | 315 | } |
308 | } | 316 | } |
309 | 317 | if (array->eof_index >= 0) { | |
318 | status = -EBADCOOKIE; | ||
319 | if (*desc->dir_cookie == array->last_cookie) | ||
320 | desc->eof = 1; | ||
321 | } | ||
310 | return status; | 322 | return status; |
311 | } | 323 | } |
312 | 324 | ||
@@ -314,10 +326,7 @@ static | |||
314 | int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc) | 326 | int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc) |
315 | { | 327 | { |
316 | struct nfs_cache_array *array; | 328 | struct nfs_cache_array *array; |
317 | int status = -EBADCOOKIE; | 329 | int status; |
318 | |||
319 | if (desc->dir_cookie == NULL) | ||
320 | goto out; | ||
321 | 330 | ||
322 | array = nfs_readdir_get_array(desc->page); | 331 | array = nfs_readdir_get_array(desc->page); |
323 | if (IS_ERR(array)) { | 332 | if (IS_ERR(array)) { |
@@ -330,6 +339,10 @@ int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc) | |||
330 | else | 339 | else |
331 | status = nfs_readdir_search_for_cookie(array, desc); | 340 | status = nfs_readdir_search_for_cookie(array, desc); |
332 | 341 | ||
342 | if (status == -EAGAIN) { | ||
343 | desc->last_cookie = array->last_cookie; | ||
344 | desc->page_index++; | ||
345 | } | ||
333 | nfs_readdir_release_array(desc->page); | 346 | nfs_readdir_release_array(desc->page); |
334 | out: | 347 | out: |
335 | return status; | 348 | return status; |
@@ -381,13 +394,9 @@ int xdr_decode(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, struct x | |||
381 | static | 394 | static |
382 | int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry) | 395 | int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry) |
383 | { | 396 | { |
384 | struct nfs_inode *node; | ||
385 | if (dentry->d_inode == NULL) | 397 | if (dentry->d_inode == NULL) |
386 | goto different; | 398 | goto different; |
387 | node = NFS_I(dentry->d_inode); | 399 | if (nfs_compare_fh(entry->fh, NFS_FH(dentry->d_inode)) != 0) |
388 | if (node->fh.size != entry->fh->size) | ||
389 | goto different; | ||
390 | if (strncmp(node->fh.data, entry->fh->data, node->fh.size) != 0) | ||
391 | goto different; | 400 | goto different; |
392 | return 1; | 401 | return 1; |
393 | different: | 402 | different: |
@@ -449,14 +458,15 @@ out: | |||
449 | 458 | ||
450 | /* Perform conversion from xdr to cache array */ | 459 | /* Perform conversion from xdr to cache array */ |
451 | static | 460 | static |
452 | void nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, | 461 | int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, |
453 | void *xdr_page, struct page *page, unsigned int buflen) | 462 | void *xdr_page, struct page *page, unsigned int buflen) |
454 | { | 463 | { |
455 | struct xdr_stream stream; | 464 | struct xdr_stream stream; |
456 | struct xdr_buf buf; | 465 | struct xdr_buf buf; |
457 | __be32 *ptr = xdr_page; | 466 | __be32 *ptr = xdr_page; |
458 | int status; | ||
459 | struct nfs_cache_array *array; | 467 | struct nfs_cache_array *array; |
468 | unsigned int count = 0; | ||
469 | int status; | ||
460 | 470 | ||
461 | buf.head->iov_base = xdr_page; | 471 | buf.head->iov_base = xdr_page; |
462 | buf.head->iov_len = buflen; | 472 | buf.head->iov_len = buflen; |
@@ -471,21 +481,32 @@ void nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *e | |||
471 | 481 | ||
472 | do { | 482 | do { |
473 | status = xdr_decode(desc, entry, &stream); | 483 | status = xdr_decode(desc, entry, &stream); |
474 | if (status != 0) | 484 | if (status != 0) { |
485 | if (status == -EAGAIN) | ||
486 | status = 0; | ||
475 | break; | 487 | break; |
488 | } | ||
476 | 489 | ||
477 | if (nfs_readdir_add_to_array(entry, page) == -1) | 490 | count++; |
478 | break; | 491 | |
479 | if (desc->plus == 1) | 492 | if (desc->plus != 0) |
480 | nfs_prime_dcache(desc->file->f_path.dentry, entry); | 493 | nfs_prime_dcache(desc->file->f_path.dentry, entry); |
494 | |||
495 | status = nfs_readdir_add_to_array(entry, page); | ||
496 | if (status != 0) | ||
497 | break; | ||
481 | } while (!entry->eof); | 498 | } while (!entry->eof); |
482 | 499 | ||
483 | if (status == -EBADCOOKIE && entry->eof) { | 500 | if (count == 0 || (status == -EBADCOOKIE && entry->eof != 0)) { |
484 | array = nfs_readdir_get_array(page); | 501 | array = nfs_readdir_get_array(page); |
485 | array->eof_index = array->size - 1; | 502 | if (!IS_ERR(array)) { |
486 | status = 0; | 503 | array->eof_index = array->size; |
487 | nfs_readdir_release_array(page); | 504 | status = 0; |
505 | nfs_readdir_release_array(page); | ||
506 | } else | ||
507 | status = PTR_ERR(array); | ||
488 | } | 508 | } |
509 | return status; | ||
489 | } | 510 | } |
490 | 511 | ||
491 | static | 512 | static |
@@ -537,11 +558,11 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, | |||
537 | struct nfs_entry entry; | 558 | struct nfs_entry entry; |
538 | struct file *file = desc->file; | 559 | struct file *file = desc->file; |
539 | struct nfs_cache_array *array; | 560 | struct nfs_cache_array *array; |
540 | int status = 0; | 561 | int status = -ENOMEM; |
541 | unsigned int array_size = ARRAY_SIZE(pages); | 562 | unsigned int array_size = ARRAY_SIZE(pages); |
542 | 563 | ||
543 | entry.prev_cookie = 0; | 564 | entry.prev_cookie = 0; |
544 | entry.cookie = *desc->dir_cookie; | 565 | entry.cookie = desc->last_cookie; |
545 | entry.eof = 0; | 566 | entry.eof = 0; |
546 | entry.fh = nfs_alloc_fhandle(); | 567 | entry.fh = nfs_alloc_fhandle(); |
547 | entry.fattr = nfs_alloc_fattr(); | 568 | entry.fattr = nfs_alloc_fattr(); |
@@ -549,6 +570,10 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, | |||
549 | goto out; | 570 | goto out; |
550 | 571 | ||
551 | array = nfs_readdir_get_array(page); | 572 | array = nfs_readdir_get_array(page); |
573 | if (IS_ERR(array)) { | ||
574 | status = PTR_ERR(array); | ||
575 | goto out; | ||
576 | } | ||
552 | memset(array, 0, sizeof(struct nfs_cache_array)); | 577 | memset(array, 0, sizeof(struct nfs_cache_array)); |
553 | array->eof_index = -1; | 578 | array->eof_index = -1; |
554 | 579 | ||
@@ -556,12 +581,19 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, | |||
556 | if (!pages_ptr) | 581 | if (!pages_ptr) |
557 | goto out_release_array; | 582 | goto out_release_array; |
558 | do { | 583 | do { |
584 | unsigned int pglen; | ||
559 | status = nfs_readdir_xdr_filler(pages, desc, &entry, file, inode); | 585 | status = nfs_readdir_xdr_filler(pages, desc, &entry, file, inode); |
560 | 586 | ||
561 | if (status < 0) | 587 | if (status < 0) |
562 | break; | 588 | break; |
563 | nfs_readdir_page_filler(desc, &entry, pages_ptr, page, array_size * PAGE_SIZE); | 589 | pglen = status; |
564 | } while (array->eof_index < 0 && array->size < MAX_READDIR_ARRAY); | 590 | status = nfs_readdir_page_filler(desc, &entry, pages_ptr, page, pglen); |
591 | if (status < 0) { | ||
592 | if (status == -ENOSPC) | ||
593 | status = 0; | ||
594 | break; | ||
595 | } | ||
596 | } while (array->eof_index < 0); | ||
565 | 597 | ||
566 | nfs_readdir_free_large_page(pages_ptr, pages, array_size); | 598 | nfs_readdir_free_large_page(pages_ptr, pages, array_size); |
567 | out_release_array: | 599 | out_release_array: |
@@ -582,8 +614,10 @@ static | |||
582 | int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page) | 614 | int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page) |
583 | { | 615 | { |
584 | struct inode *inode = desc->file->f_path.dentry->d_inode; | 616 | struct inode *inode = desc->file->f_path.dentry->d_inode; |
617 | int ret; | ||
585 | 618 | ||
586 | if (nfs_readdir_xdr_to_array(desc, page, inode) < 0) | 619 | ret = nfs_readdir_xdr_to_array(desc, page, inode); |
620 | if (ret < 0) | ||
587 | goto error; | 621 | goto error; |
588 | SetPageUptodate(page); | 622 | SetPageUptodate(page); |
589 | 623 | ||
@@ -595,12 +629,14 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page) | |||
595 | return 0; | 629 | return 0; |
596 | error: | 630 | error: |
597 | unlock_page(page); | 631 | unlock_page(page); |
598 | return -EIO; | 632 | return ret; |
599 | } | 633 | } |
600 | 634 | ||
601 | static | 635 | static |
602 | void cache_page_release(nfs_readdir_descriptor_t *desc) | 636 | void cache_page_release(nfs_readdir_descriptor_t *desc) |
603 | { | 637 | { |
638 | if (!desc->page->mapping) | ||
639 | nfs_readdir_clear_array(desc->page); | ||
604 | page_cache_release(desc->page); | 640 | page_cache_release(desc->page); |
605 | desc->page = NULL; | 641 | desc->page = NULL; |
606 | } | 642 | } |
@@ -608,12 +644,8 @@ void cache_page_release(nfs_readdir_descriptor_t *desc) | |||
608 | static | 644 | static |
609 | struct page *get_cache_page(nfs_readdir_descriptor_t *desc) | 645 | struct page *get_cache_page(nfs_readdir_descriptor_t *desc) |
610 | { | 646 | { |
611 | struct page *page; | 647 | return read_cache_page(desc->file->f_path.dentry->d_inode->i_mapping, |
612 | page = read_cache_page(desc->file->f_path.dentry->d_inode->i_mapping, | ||
613 | desc->page_index, (filler_t *)nfs_readdir_filler, desc); | 648 | desc->page_index, (filler_t *)nfs_readdir_filler, desc); |
614 | if (IS_ERR(page)) | ||
615 | desc->eof = 1; | ||
616 | return page; | ||
617 | } | 649 | } |
618 | 650 | ||
619 | /* | 651 | /* |
@@ -629,9 +661,8 @@ int find_cache_page(nfs_readdir_descriptor_t *desc) | |||
629 | return PTR_ERR(desc->page); | 661 | return PTR_ERR(desc->page); |
630 | 662 | ||
631 | res = nfs_readdir_search_array(desc); | 663 | res = nfs_readdir_search_array(desc); |
632 | if (res == 0) | 664 | if (res != 0) |
633 | return 0; | 665 | cache_page_release(desc); |
634 | cache_page_release(desc); | ||
635 | return res; | 666 | return res; |
636 | } | 667 | } |
637 | 668 | ||
@@ -639,22 +670,18 @@ int find_cache_page(nfs_readdir_descriptor_t *desc) | |||
639 | static inline | 670 | static inline |
640 | int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) | 671 | int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) |
641 | { | 672 | { |
642 | int res = -EAGAIN; | 673 | int res; |
643 | 674 | ||
644 | while (1) { | 675 | if (desc->page_index == 0) { |
645 | res = find_cache_page(desc); | 676 | desc->current_index = 0; |
646 | if (res != -EAGAIN) | 677 | desc->last_cookie = 0; |
647 | break; | ||
648 | desc->page_index++; | ||
649 | } | 678 | } |
679 | do { | ||
680 | res = find_cache_page(desc); | ||
681 | } while (res == -EAGAIN); | ||
650 | return res; | 682 | return res; |
651 | } | 683 | } |
652 | 684 | ||
653 | static inline unsigned int dt_type(struct inode *inode) | ||
654 | { | ||
655 | return (inode->i_mode >> 12) & 15; | ||
656 | } | ||
657 | |||
658 | /* | 685 | /* |
659 | * Once we've found the start of the dirent within a page: fill 'er up... | 686 | * Once we've found the start of the dirent within a page: fill 'er up... |
660 | */ | 687 | */ |
@@ -666,35 +693,35 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, | |||
666 | int i = 0; | 693 | int i = 0; |
667 | int res = 0; | 694 | int res = 0; |
668 | struct nfs_cache_array *array = NULL; | 695 | struct nfs_cache_array *array = NULL; |
669 | unsigned int d_type = DT_UNKNOWN; | ||
670 | struct dentry *dentry = NULL; | ||
671 | 696 | ||
672 | array = nfs_readdir_get_array(desc->page); | 697 | array = nfs_readdir_get_array(desc->page); |
698 | if (IS_ERR(array)) { | ||
699 | res = PTR_ERR(array); | ||
700 | goto out; | ||
701 | } | ||
673 | 702 | ||
674 | for (i = desc->cache_entry_index; i < array->size; i++) { | 703 | for (i = desc->cache_entry_index; i < array->size; i++) { |
675 | d_type = DT_UNKNOWN; | 704 | struct nfs_cache_array_entry *ent; |
676 | 705 | ||
677 | res = filldir(dirent, array->array[i].string.name, | 706 | ent = &array->array[i]; |
678 | array->array[i].string.len, file->f_pos, | 707 | if (filldir(dirent, ent->string.name, ent->string.len, |
679 | nfs_compat_user_ino64(array->array[i].ino), d_type); | 708 | file->f_pos, nfs_compat_user_ino64(ent->ino), |
680 | if (res < 0) | 709 | ent->d_type) < 0) { |
710 | desc->eof = 1; | ||
681 | break; | 711 | break; |
712 | } | ||
682 | file->f_pos++; | 713 | file->f_pos++; |
683 | desc->cache_entry_index = i; | ||
684 | if (i < (array->size-1)) | 714 | if (i < (array->size-1)) |
685 | *desc->dir_cookie = array->array[i+1].cookie; | 715 | *desc->dir_cookie = array->array[i+1].cookie; |
686 | else | 716 | else |
687 | *desc->dir_cookie = array->last_cookie; | 717 | *desc->dir_cookie = array->last_cookie; |
688 | if (i == array->eof_index) { | ||
689 | desc->eof = 1; | ||
690 | break; | ||
691 | } | ||
692 | } | 718 | } |
719 | if (array->eof_index >= 0) | ||
720 | desc->eof = 1; | ||
693 | 721 | ||
694 | nfs_readdir_release_array(desc->page); | 722 | nfs_readdir_release_array(desc->page); |
723 | out: | ||
695 | cache_page_release(desc); | 724 | cache_page_release(desc); |
696 | if (dentry != NULL) | ||
697 | dput(dentry); | ||
698 | dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", | 725 | dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", |
699 | (unsigned long long)*desc->dir_cookie, res); | 726 | (unsigned long long)*desc->dir_cookie, res); |
700 | return res; | 727 | return res; |
@@ -729,13 +756,14 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, | |||
729 | goto out; | 756 | goto out; |
730 | } | 757 | } |
731 | 758 | ||
732 | if (nfs_readdir_xdr_to_array(desc, page, inode) == -1) { | ||
733 | status = -EIO; | ||
734 | goto out_release; | ||
735 | } | ||
736 | |||
737 | desc->page_index = 0; | 759 | desc->page_index = 0; |
760 | desc->last_cookie = *desc->dir_cookie; | ||
738 | desc->page = page; | 761 | desc->page = page; |
762 | |||
763 | status = nfs_readdir_xdr_to_array(desc, page, inode); | ||
764 | if (status < 0) | ||
765 | goto out_release; | ||
766 | |||
739 | status = nfs_do_filldir(desc, dirent, filldir); | 767 | status = nfs_do_filldir(desc, dirent, filldir); |
740 | 768 | ||
741 | out: | 769 | out: |
@@ -757,7 +785,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
757 | struct inode *inode = dentry->d_inode; | 785 | struct inode *inode = dentry->d_inode; |
758 | nfs_readdir_descriptor_t my_desc, | 786 | nfs_readdir_descriptor_t my_desc, |
759 | *desc = &my_desc; | 787 | *desc = &my_desc; |
760 | int res = -ENOMEM; | 788 | int res; |
761 | 789 | ||
762 | dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n", | 790 | dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n", |
763 | dentry->d_parent->d_name.name, dentry->d_name.name, | 791 | dentry->d_parent->d_name.name, dentry->d_name.name, |
@@ -782,18 +810,18 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
782 | if (res < 0) | 810 | if (res < 0) |
783 | goto out; | 811 | goto out; |
784 | 812 | ||
785 | while (desc->eof != 1) { | 813 | do { |
786 | res = readdir_search_pagecache(desc); | 814 | res = readdir_search_pagecache(desc); |
787 | 815 | ||
788 | if (res == -EBADCOOKIE) { | 816 | if (res == -EBADCOOKIE) { |
817 | res = 0; | ||
789 | /* This means either end of directory */ | 818 | /* This means either end of directory */ |
790 | if (*desc->dir_cookie && desc->eof == 0) { | 819 | if (*desc->dir_cookie && desc->eof == 0) { |
791 | /* Or that the server has 'lost' a cookie */ | 820 | /* Or that the server has 'lost' a cookie */ |
792 | res = uncached_readdir(desc, dirent, filldir); | 821 | res = uncached_readdir(desc, dirent, filldir); |
793 | if (res >= 0) | 822 | if (res == 0) |
794 | continue; | 823 | continue; |
795 | } | 824 | } |
796 | res = 0; | ||
797 | break; | 825 | break; |
798 | } | 826 | } |
799 | if (res == -ETOOSMALL && desc->plus) { | 827 | if (res == -ETOOSMALL && desc->plus) { |
@@ -808,11 +836,9 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
808 | break; | 836 | break; |
809 | 837 | ||
810 | res = nfs_do_filldir(desc, dirent, filldir); | 838 | res = nfs_do_filldir(desc, dirent, filldir); |
811 | if (res < 0) { | 839 | if (res < 0) |
812 | res = 0; | ||
813 | break; | 840 | break; |
814 | } | 841 | } while (!desc->eof); |
815 | } | ||
816 | out: | 842 | out: |
817 | nfs_unblock_sillyrename(dentry); | 843 | nfs_unblock_sillyrename(dentry); |
818 | if (res > 0) | 844 | if (res > 0) |
@@ -1345,12 +1371,12 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry | |||
1345 | res = NULL; | 1371 | res = NULL; |
1346 | goto out; | 1372 | goto out; |
1347 | /* This turned out not to be a regular file */ | 1373 | /* This turned out not to be a regular file */ |
1348 | case -EISDIR: | ||
1349 | case -ENOTDIR: | 1374 | case -ENOTDIR: |
1350 | goto no_open; | 1375 | goto no_open; |
1351 | case -ELOOP: | 1376 | case -ELOOP: |
1352 | if (!(nd->intent.open.flags & O_NOFOLLOW)) | 1377 | if (!(nd->intent.open.flags & O_NOFOLLOW)) |
1353 | goto no_open; | 1378 | goto no_open; |
1379 | /* case -EISDIR: */ | ||
1354 | /* case -EINVAL: */ | 1380 | /* case -EINVAL: */ |
1355 | default: | 1381 | default: |
1356 | res = ERR_CAST(inode); | 1382 | res = ERR_CAST(inode); |
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 84d3c8b90206..e6ace0d93c71 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c | |||
@@ -867,7 +867,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, | |||
867 | goto out; | 867 | goto out; |
868 | nfs_alloc_commit_data(dreq); | 868 | nfs_alloc_commit_data(dreq); |
869 | 869 | ||
870 | if (dreq->commit_data == NULL || count < wsize) | 870 | if (dreq->commit_data == NULL || count <= wsize) |
871 | sync = NFS_FILE_SYNC; | 871 | sync = NFS_FILE_SYNC; |
872 | 872 | ||
873 | dreq->inode = inode; | 873 | dreq->inode = inode; |
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index e756075637b0..7bf029ef4084 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -693,6 +693,7 @@ do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) | |||
693 | { | 693 | { |
694 | struct inode *inode = filp->f_mapping->host; | 694 | struct inode *inode = filp->f_mapping->host; |
695 | int status = 0; | 695 | int status = 0; |
696 | unsigned int saved_type = fl->fl_type; | ||
696 | 697 | ||
697 | /* Try local locking first */ | 698 | /* Try local locking first */ |
698 | posix_test_lock(filp, fl); | 699 | posix_test_lock(filp, fl); |
@@ -700,6 +701,7 @@ do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) | |||
700 | /* found a conflict */ | 701 | /* found a conflict */ |
701 | goto out; | 702 | goto out; |
702 | } | 703 | } |
704 | fl->fl_type = saved_type; | ||
703 | 705 | ||
704 | if (nfs_have_delegation(inode, FMODE_READ)) | 706 | if (nfs_have_delegation(inode, FMODE_READ)) |
705 | goto out_noconflict; | 707 | goto out_noconflict; |
@@ -884,6 +886,5 @@ static int nfs_setlease(struct file *file, long arg, struct file_lock **fl) | |||
884 | dprintk("NFS: setlease(%s/%s, arg=%ld)\n", | 886 | dprintk("NFS: setlease(%s/%s, arg=%ld)\n", |
885 | file->f_path.dentry->d_parent->d_name.name, | 887 | file->f_path.dentry->d_parent->d_name.name, |
886 | file->f_path.dentry->d_name.name, arg); | 888 | file->f_path.dentry->d_name.name, arg); |
887 | |||
888 | return -EINVAL; | 889 | return -EINVAL; |
889 | } | 890 | } |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 314f57164602..e67e31c73416 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -289,6 +289,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) | |||
289 | } else if (S_ISDIR(inode->i_mode)) { | 289 | } else if (S_ISDIR(inode->i_mode)) { |
290 | inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops; | 290 | inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops; |
291 | inode->i_fop = &nfs_dir_operations; | 291 | inode->i_fop = &nfs_dir_operations; |
292 | inode->i_data.a_ops = &nfs_dir_aops; | ||
292 | if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)) | 293 | if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)) |
293 | set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); | 294 | set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); |
294 | /* Deal with crossing mountpoints */ | 295 | /* Deal with crossing mountpoints */ |
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index db08ff3ff454..e6356b750b77 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
@@ -362,6 +362,15 @@ unsigned int nfs_page_length(struct page *page) | |||
362 | } | 362 | } |
363 | 363 | ||
364 | /* | 364 | /* |
365 | * Convert a umode to a dirent->d_type | ||
366 | */ | ||
367 | static inline | ||
368 | unsigned char nfs_umode_to_dtype(umode_t mode) | ||
369 | { | ||
370 | return (mode >> 12) & 15; | ||
371 | } | ||
372 | |||
373 | /* | ||
365 | * Determine the number of pages in an array of length 'len' and | 374 | * Determine the number of pages in an array of length 'len' and |
366 | * with a base offset of 'base' | 375 | * with a base offset of 'base' |
367 | */ | 376 | */ |
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index eceafe74f473..4f981f1f6689 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c | |||
@@ -505,13 +505,13 @@ static struct rpc_procinfo mnt3_procedures[] = { | |||
505 | 505 | ||
506 | static struct rpc_version mnt_version1 = { | 506 | static struct rpc_version mnt_version1 = { |
507 | .number = 1, | 507 | .number = 1, |
508 | .nrprocs = 2, | 508 | .nrprocs = ARRAY_SIZE(mnt_procedures), |
509 | .procs = mnt_procedures, | 509 | .procs = mnt_procedures, |
510 | }; | 510 | }; |
511 | 511 | ||
512 | static struct rpc_version mnt_version3 = { | 512 | static struct rpc_version mnt_version3 = { |
513 | .number = 3, | 513 | .number = 3, |
514 | .nrprocs = 2, | 514 | .nrprocs = ARRAY_SIZE(mnt3_procedures), |
515 | .procs = mnt3_procedures, | 515 | .procs = mnt3_procedures, |
516 | }; | 516 | }; |
517 | 517 | ||
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index e6bf45710cc7..5914a1911c95 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c | |||
@@ -423,7 +423,7 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy) | |||
423 | struct page **page; | 423 | struct page **page; |
424 | size_t hdrlen; | 424 | size_t hdrlen; |
425 | unsigned int pglen, recvd; | 425 | unsigned int pglen, recvd; |
426 | int status, nr = 0; | 426 | int status; |
427 | 427 | ||
428 | if ((status = ntohl(*p++))) | 428 | if ((status = ntohl(*p++))) |
429 | return nfs_stat_to_errno(status); | 429 | return nfs_stat_to_errno(status); |
@@ -443,7 +443,7 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy) | |||
443 | if (pglen > recvd) | 443 | if (pglen > recvd) |
444 | pglen = recvd; | 444 | pglen = recvd; |
445 | page = rcvbuf->pages; | 445 | page = rcvbuf->pages; |
446 | return nr; | 446 | return pglen; |
447 | } | 447 | } |
448 | 448 | ||
449 | static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) | 449 | static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) |
@@ -485,6 +485,8 @@ nfs_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_se | |||
485 | entry->prev_cookie = entry->cookie; | 485 | entry->prev_cookie = entry->cookie; |
486 | entry->cookie = ntohl(*p++); | 486 | entry->cookie = ntohl(*p++); |
487 | 487 | ||
488 | entry->d_type = DT_UNKNOWN; | ||
489 | |||
488 | p = xdr_inline_peek(xdr, 8); | 490 | p = xdr_inline_peek(xdr, 8); |
489 | if (p != NULL) | 491 | if (p != NULL) |
490 | entry->eof = !p[0] && p[1]; | 492 | entry->eof = !p[0] && p[1]; |
@@ -495,7 +497,7 @@ nfs_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_se | |||
495 | 497 | ||
496 | out_overflow: | 498 | out_overflow: |
497 | print_overflow_msg(__func__, xdr); | 499 | print_overflow_msg(__func__, xdr); |
498 | return ERR_PTR(-EIO); | 500 | return ERR_PTR(-EAGAIN); |
499 | } | 501 | } |
500 | 502 | ||
501 | /* | 503 | /* |
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index d9a5e832c257..f6cc60f06dac 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c | |||
@@ -555,7 +555,7 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res | |||
555 | struct page **page; | 555 | struct page **page; |
556 | size_t hdrlen; | 556 | size_t hdrlen; |
557 | u32 recvd, pglen; | 557 | u32 recvd, pglen; |
558 | int status, nr = 0; | 558 | int status; |
559 | 559 | ||
560 | status = ntohl(*p++); | 560 | status = ntohl(*p++); |
561 | /* Decode post_op_attrs */ | 561 | /* Decode post_op_attrs */ |
@@ -586,7 +586,7 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res | |||
586 | pglen = recvd; | 586 | pglen = recvd; |
587 | page = rcvbuf->pages; | 587 | page = rcvbuf->pages; |
588 | 588 | ||
589 | return nr; | 589 | return pglen; |
590 | } | 590 | } |
591 | 591 | ||
592 | __be32 * | 592 | __be32 * |
@@ -622,11 +622,13 @@ nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_s | |||
622 | entry->prev_cookie = entry->cookie; | 622 | entry->prev_cookie = entry->cookie; |
623 | p = xdr_decode_hyper(p, &entry->cookie); | 623 | p = xdr_decode_hyper(p, &entry->cookie); |
624 | 624 | ||
625 | entry->d_type = DT_UNKNOWN; | ||
625 | if (plus) { | 626 | if (plus) { |
626 | entry->fattr->valid = 0; | 627 | entry->fattr->valid = 0; |
627 | p = xdr_decode_post_op_attr_stream(xdr, entry->fattr); | 628 | p = xdr_decode_post_op_attr_stream(xdr, entry->fattr); |
628 | if (IS_ERR(p)) | 629 | if (IS_ERR(p)) |
629 | goto out_overflow_exit; | 630 | goto out_overflow_exit; |
631 | entry->d_type = nfs_umode_to_dtype(entry->fattr->mode); | ||
630 | /* In fact, a post_op_fh3: */ | 632 | /* In fact, a post_op_fh3: */ |
631 | p = xdr_inline_decode(xdr, 4); | 633 | p = xdr_inline_decode(xdr, 4); |
632 | if (unlikely(!p)) | 634 | if (unlikely(!p)) |
@@ -656,7 +658,7 @@ nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_s | |||
656 | out_overflow: | 658 | out_overflow: |
657 | print_overflow_msg(__func__, xdr); | 659 | print_overflow_msg(__func__, xdr); |
658 | out_overflow_exit: | 660 | out_overflow_exit: |
659 | return ERR_PTR(-EIO); | 661 | return ERR_PTR(-EAGAIN); |
660 | } | 662 | } |
661 | 663 | ||
662 | /* | 664 | /* |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0f24cdf2cb13..4435e5e1f904 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -2852,8 +2852,10 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, | |||
2852 | nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args); | 2852 | nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args); |
2853 | res.pgbase = args.pgbase; | 2853 | res.pgbase = args.pgbase; |
2854 | status = nfs4_call_sync(NFS_SERVER(dir), &msg, &args, &res, 0); | 2854 | status = nfs4_call_sync(NFS_SERVER(dir), &msg, &args, &res, 0); |
2855 | if (status == 0) | 2855 | if (status >= 0) { |
2856 | memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE); | 2856 | memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE); |
2857 | status += args.pgbase; | ||
2858 | } | ||
2857 | 2859 | ||
2858 | nfs_invalidate_atime(dir); | 2860 | nfs_invalidate_atime(dir); |
2859 | 2861 | ||
@@ -3359,6 +3361,8 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen) | |||
3359 | ret = nfs_revalidate_inode(server, inode); | 3361 | ret = nfs_revalidate_inode(server, inode); |
3360 | if (ret < 0) | 3362 | if (ret < 0) |
3361 | return ret; | 3363 | return ret; |
3364 | if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL) | ||
3365 | nfs_zap_acl_cache(inode); | ||
3362 | ret = nfs4_read_cached_acl(inode, buf, buflen); | 3366 | ret = nfs4_read_cached_acl(inode, buf, buflen); |
3363 | if (ret != -ENOENT) | 3367 | if (ret != -ENOENT) |
3364 | return ret; | 3368 | return ret; |
@@ -3387,6 +3391,13 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl | |||
3387 | nfs_inode_return_delegation(inode); | 3391 | nfs_inode_return_delegation(inode); |
3388 | buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase); | 3392 | buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase); |
3389 | ret = nfs4_call_sync(server, &msg, &arg, &res, 1); | 3393 | ret = nfs4_call_sync(server, &msg, &arg, &res, 1); |
3394 | /* | ||
3395 | * Acl update can result in inode attribute update. | ||
3396 | * so mark the attribute cache invalid. | ||
3397 | */ | ||
3398 | spin_lock(&inode->i_lock); | ||
3399 | NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR; | ||
3400 | spin_unlock(&inode->i_lock); | ||
3390 | nfs_access_zap_cache(inode); | 3401 | nfs_access_zap_cache(inode); |
3391 | nfs_zap_acl_cache(inode); | 3402 | nfs_zap_acl_cache(inode); |
3392 | return ret; | 3403 | return ret; |
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index f313c4cce7e4..9f1826b012e6 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c | |||
@@ -4518,7 +4518,7 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n | |||
4518 | xdr_read_pages(xdr, pglen); | 4518 | xdr_read_pages(xdr, pglen); |
4519 | 4519 | ||
4520 | 4520 | ||
4521 | return 0; | 4521 | return pglen; |
4522 | } | 4522 | } |
4523 | 4523 | ||
4524 | static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req) | 4524 | static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req) |
@@ -6208,6 +6208,10 @@ __be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, | |||
6208 | if (entry->fattr->valid & NFS_ATTR_FATTR_FILEID) | 6208 | if (entry->fattr->valid & NFS_ATTR_FATTR_FILEID) |
6209 | entry->ino = entry->fattr->fileid; | 6209 | entry->ino = entry->fattr->fileid; |
6210 | 6210 | ||
6211 | entry->d_type = DT_UNKNOWN; | ||
6212 | if (entry->fattr->valid & NFS_ATTR_FATTR_TYPE) | ||
6213 | entry->d_type = nfs_umode_to_dtype(entry->fattr->mode); | ||
6214 | |||
6211 | if (verify_attr_len(xdr, p, len) < 0) | 6215 | if (verify_attr_len(xdr, p, len) < 0) |
6212 | goto out_overflow; | 6216 | goto out_overflow; |
6213 | 6217 | ||
@@ -6221,7 +6225,7 @@ __be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, | |||
6221 | 6225 | ||
6222 | out_overflow: | 6226 | out_overflow: |
6223 | print_overflow_msg(__func__, xdr); | 6227 | print_overflow_msg(__func__, xdr); |
6224 | return ERR_PTR(-EIO); | 6228 | return ERR_PTR(-EAGAIN); |
6225 | } | 6229 | } |
6226 | 6230 | ||
6227 | /* | 6231 | /* |
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 137b549e63db..b68536cc9046 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c | |||
@@ -115,7 +115,7 @@ int nfs_set_page_tag_locked(struct nfs_page *req) | |||
115 | { | 115 | { |
116 | if (!nfs_lock_request_dontget(req)) | 116 | if (!nfs_lock_request_dontget(req)) |
117 | return 0; | 117 | return 0; |
118 | if (req->wb_page != NULL) | 118 | if (test_bit(PG_MAPPED, &req->wb_flags)) |
119 | radix_tree_tag_set(&NFS_I(req->wb_context->path.dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); | 119 | radix_tree_tag_set(&NFS_I(req->wb_context->path.dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); |
120 | return 1; | 120 | return 1; |
121 | } | 121 | } |
@@ -125,7 +125,7 @@ int nfs_set_page_tag_locked(struct nfs_page *req) | |||
125 | */ | 125 | */ |
126 | void nfs_clear_page_tag_locked(struct nfs_page *req) | 126 | void nfs_clear_page_tag_locked(struct nfs_page *req) |
127 | { | 127 | { |
128 | if (req->wb_page != NULL) { | 128 | if (test_bit(PG_MAPPED, &req->wb_flags)) { |
129 | struct inode *inode = req->wb_context->path.dentry->d_inode; | 129 | struct inode *inode = req->wb_context->path.dentry->d_inode; |
130 | struct nfs_inode *nfsi = NFS_I(inode); | 130 | struct nfs_inode *nfsi = NFS_I(inode); |
131 | 131 | ||
diff --git a/fs/nfs/read.c b/fs/nfs/read.c index e4b62c6f5a6e..aedcaa7f291f 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c | |||
@@ -152,7 +152,6 @@ static void nfs_readpage_release(struct nfs_page *req) | |||
152 | (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), | 152 | (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), |
153 | req->wb_bytes, | 153 | req->wb_bytes, |
154 | (long long)req_offset(req)); | 154 | (long long)req_offset(req)); |
155 | nfs_clear_request(req); | ||
156 | nfs_release_request(req); | 155 | nfs_release_request(req); |
157 | } | 156 | } |
158 | 157 | ||
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 0a42e8f4adcb..4100630c9a5b 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -39,7 +39,6 @@ | |||
39 | #include <linux/nfs_mount.h> | 39 | #include <linux/nfs_mount.h> |
40 | #include <linux/nfs4_mount.h> | 40 | #include <linux/nfs4_mount.h> |
41 | #include <linux/lockd/bind.h> | 41 | #include <linux/lockd/bind.h> |
42 | #include <linux/smp_lock.h> | ||
43 | #include <linux/seq_file.h> | 42 | #include <linux/seq_file.h> |
44 | #include <linux/mount.h> | 43 | #include <linux/mount.h> |
45 | #include <linux/mnt_namespace.h> | 44 | #include <linux/mnt_namespace.h> |
@@ -67,6 +66,12 @@ | |||
67 | 66 | ||
68 | #define NFSDBG_FACILITY NFSDBG_VFS | 67 | #define NFSDBG_FACILITY NFSDBG_VFS |
69 | 68 | ||
69 | #ifdef CONFIG_NFS_V3 | ||
70 | #define NFS_DEFAULT_VERSION 3 | ||
71 | #else | ||
72 | #define NFS_DEFAULT_VERSION 2 | ||
73 | #endif | ||
74 | |||
70 | enum { | 75 | enum { |
71 | /* Mount options that take no arguments */ | 76 | /* Mount options that take no arguments */ |
72 | Opt_soft, Opt_hard, | 77 | Opt_soft, Opt_hard, |
@@ -1064,12 +1069,10 @@ static int nfs_parse_mount_options(char *raw, | |||
1064 | mnt->flags |= NFS_MOUNT_VER3; | 1069 | mnt->flags |= NFS_MOUNT_VER3; |
1065 | mnt->version = 3; | 1070 | mnt->version = 3; |
1066 | break; | 1071 | break; |
1067 | #ifdef CONFIG_NFS_V4 | ||
1068 | case Opt_v4: | 1072 | case Opt_v4: |
1069 | mnt->flags &= ~NFS_MOUNT_VER3; | 1073 | mnt->flags &= ~NFS_MOUNT_VER3; |
1070 | mnt->version = 4; | 1074 | mnt->version = 4; |
1071 | break; | 1075 | break; |
1072 | #endif | ||
1073 | case Opt_udp: | 1076 | case Opt_udp: |
1074 | mnt->flags &= ~NFS_MOUNT_TCP; | 1077 | mnt->flags &= ~NFS_MOUNT_TCP; |
1075 | mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; | 1078 | mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; |
@@ -1281,12 +1284,10 @@ static int nfs_parse_mount_options(char *raw, | |||
1281 | mnt->flags |= NFS_MOUNT_VER3; | 1284 | mnt->flags |= NFS_MOUNT_VER3; |
1282 | mnt->version = 3; | 1285 | mnt->version = 3; |
1283 | break; | 1286 | break; |
1284 | #ifdef CONFIG_NFS_V4 | ||
1285 | case NFS4_VERSION: | 1287 | case NFS4_VERSION: |
1286 | mnt->flags &= ~NFS_MOUNT_VER3; | 1288 | mnt->flags &= ~NFS_MOUNT_VER3; |
1287 | mnt->version = 4; | 1289 | mnt->version = 4; |
1288 | break; | 1290 | break; |
1289 | #endif | ||
1290 | default: | 1291 | default: |
1291 | goto out_invalid_value; | 1292 | goto out_invalid_value; |
1292 | } | 1293 | } |
@@ -2277,7 +2278,7 @@ static int nfs_get_sb(struct file_system_type *fs_type, | |||
2277 | }; | 2278 | }; |
2278 | int error = -ENOMEM; | 2279 | int error = -ENOMEM; |
2279 | 2280 | ||
2280 | data = nfs_alloc_parsed_mount_data(3); | 2281 | data = nfs_alloc_parsed_mount_data(NFS_DEFAULT_VERSION); |
2281 | mntfh = nfs_alloc_fhandle(); | 2282 | mntfh = nfs_alloc_fhandle(); |
2282 | if (data == NULL || mntfh == NULL) | 2283 | if (data == NULL || mntfh == NULL) |
2283 | goto out_free_fh; | 2284 | goto out_free_fh; |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 4c14c17a5276..10d648ea128b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -390,6 +390,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) | |||
390 | if (nfs_have_delegation(inode, FMODE_WRITE)) | 390 | if (nfs_have_delegation(inode, FMODE_WRITE)) |
391 | nfsi->change_attr++; | 391 | nfsi->change_attr++; |
392 | } | 392 | } |
393 | set_bit(PG_MAPPED, &req->wb_flags); | ||
393 | SetPagePrivate(req->wb_page); | 394 | SetPagePrivate(req->wb_page); |
394 | set_page_private(req->wb_page, (unsigned long)req); | 395 | set_page_private(req->wb_page, (unsigned long)req); |
395 | nfsi->npages++; | 396 | nfsi->npages++; |
@@ -415,6 +416,7 @@ static void nfs_inode_remove_request(struct nfs_page *req) | |||
415 | spin_lock(&inode->i_lock); | 416 | spin_lock(&inode->i_lock); |
416 | set_page_private(req->wb_page, 0); | 417 | set_page_private(req->wb_page, 0); |
417 | ClearPagePrivate(req->wb_page); | 418 | ClearPagePrivate(req->wb_page); |
419 | clear_bit(PG_MAPPED, &req->wb_flags); | ||
418 | radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); | 420 | radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); |
419 | nfsi->npages--; | 421 | nfsi->npages--; |
420 | if (!nfsi->npages) { | 422 | if (!nfsi->npages) { |
@@ -422,7 +424,6 @@ static void nfs_inode_remove_request(struct nfs_page *req) | |||
422 | iput(inode); | 424 | iput(inode); |
423 | } else | 425 | } else |
424 | spin_unlock(&inode->i_lock); | 426 | spin_unlock(&inode->i_lock); |
425 | nfs_clear_request(req); | ||
426 | nfs_release_request(req); | 427 | nfs_release_request(req); |
427 | } | 428 | } |
428 | 429 | ||
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 2a533a0af2a9..7e84a852cdae 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c | |||
@@ -260,9 +260,11 @@ void fill_post_wcc(struct svc_fh *fhp) | |||
260 | err = vfs_getattr(fhp->fh_export->ex_path.mnt, fhp->fh_dentry, | 260 | err = vfs_getattr(fhp->fh_export->ex_path.mnt, fhp->fh_dentry, |
261 | &fhp->fh_post_attr); | 261 | &fhp->fh_post_attr); |
262 | fhp->fh_post_change = fhp->fh_dentry->d_inode->i_version; | 262 | fhp->fh_post_change = fhp->fh_dentry->d_inode->i_version; |
263 | if (err) | 263 | if (err) { |
264 | fhp->fh_post_saved = 0; | 264 | fhp->fh_post_saved = 0; |
265 | else | 265 | /* Grab the ctime anyway - set_change_info might use it */ |
266 | fhp->fh_post_attr.ctime = fhp->fh_dentry->d_inode->i_ctime; | ||
267 | } else | ||
266 | fhp->fh_post_saved = 1; | 268 | fhp->fh_post_saved = 1; |
267 | } | 269 | } |
268 | 270 | ||
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 56347e0ac88d..116cab970e0f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c | |||
@@ -673,16 +673,17 @@ static void nfsd4_hash_conn(struct nfsd4_conn *conn, struct nfsd4_session *ses) | |||
673 | spin_unlock(&clp->cl_lock); | 673 | spin_unlock(&clp->cl_lock); |
674 | } | 674 | } |
675 | 675 | ||
676 | static void nfsd4_register_conn(struct nfsd4_conn *conn) | 676 | static int nfsd4_register_conn(struct nfsd4_conn *conn) |
677 | { | 677 | { |
678 | conn->cn_xpt_user.callback = nfsd4_conn_lost; | 678 | conn->cn_xpt_user.callback = nfsd4_conn_lost; |
679 | register_xpt_user(conn->cn_xprt, &conn->cn_xpt_user); | 679 | return register_xpt_user(conn->cn_xprt, &conn->cn_xpt_user); |
680 | } | 680 | } |
681 | 681 | ||
682 | static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses) | 682 | static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses) |
683 | { | 683 | { |
684 | struct nfsd4_conn *conn; | 684 | struct nfsd4_conn *conn; |
685 | u32 flags = NFS4_CDFC4_FORE; | 685 | u32 flags = NFS4_CDFC4_FORE; |
686 | int ret; | ||
686 | 687 | ||
687 | if (ses->se_flags & SESSION4_BACK_CHAN) | 688 | if (ses->se_flags & SESSION4_BACK_CHAN) |
688 | flags |= NFS4_CDFC4_BACK; | 689 | flags |= NFS4_CDFC4_BACK; |
@@ -690,7 +691,10 @@ static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses) | |||
690 | if (!conn) | 691 | if (!conn) |
691 | return nfserr_jukebox; | 692 | return nfserr_jukebox; |
692 | nfsd4_hash_conn(conn, ses); | 693 | nfsd4_hash_conn(conn, ses); |
693 | nfsd4_register_conn(conn); | 694 | ret = nfsd4_register_conn(conn); |
695 | if (ret) | ||
696 | /* oops; xprt is already down: */ | ||
697 | nfsd4_conn_lost(&conn->cn_xpt_user); | ||
694 | return nfs_ok; | 698 | return nfs_ok; |
695 | } | 699 | } |
696 | 700 | ||
@@ -1644,6 +1648,7 @@ static void nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_sessi | |||
1644 | { | 1648 | { |
1645 | struct nfs4_client *clp = ses->se_client; | 1649 | struct nfs4_client *clp = ses->se_client; |
1646 | struct nfsd4_conn *c; | 1650 | struct nfsd4_conn *c; |
1651 | int ret; | ||
1647 | 1652 | ||
1648 | spin_lock(&clp->cl_lock); | 1653 | spin_lock(&clp->cl_lock); |
1649 | c = __nfsd4_find_conn(new->cn_xprt, ses); | 1654 | c = __nfsd4_find_conn(new->cn_xprt, ses); |
@@ -1654,7 +1659,10 @@ static void nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_sessi | |||
1654 | } | 1659 | } |
1655 | __nfsd4_hash_conn(new, ses); | 1660 | __nfsd4_hash_conn(new, ses); |
1656 | spin_unlock(&clp->cl_lock); | 1661 | spin_unlock(&clp->cl_lock); |
1657 | nfsd4_register_conn(new); | 1662 | ret = nfsd4_register_conn(new); |
1663 | if (ret) | ||
1664 | /* oops; xprt is already down: */ | ||
1665 | nfsd4_conn_lost(&new->cn_xpt_user); | ||
1658 | return; | 1666 | return; |
1659 | } | 1667 | } |
1660 | 1668 | ||
@@ -2254,7 +2262,7 @@ nfs4_file_downgrade(struct nfs4_file *fp, unsigned int share_access) | |||
2254 | * Spawn a thread to perform a recall on the delegation represented | 2262 | * Spawn a thread to perform a recall on the delegation represented |
2255 | * by the lease (file_lock) | 2263 | * by the lease (file_lock) |
2256 | * | 2264 | * |
2257 | * Called from break_lease() with lock_kernel() held. | 2265 | * Called from break_lease() with lock_flocks() held. |
2258 | * Note: we assume break_lease will only call this *once* for any given | 2266 | * Note: we assume break_lease will only call this *once* for any given |
2259 | * lease. | 2267 | * lease. |
2260 | */ | 2268 | */ |
@@ -2278,7 +2286,7 @@ void nfsd_break_deleg_cb(struct file_lock *fl) | |||
2278 | list_add_tail(&dp->dl_recall_lru, &del_recall_lru); | 2286 | list_add_tail(&dp->dl_recall_lru, &del_recall_lru); |
2279 | spin_unlock(&recall_lock); | 2287 | spin_unlock(&recall_lock); |
2280 | 2288 | ||
2281 | /* only place dl_time is set. protected by lock_kernel*/ | 2289 | /* only place dl_time is set. protected by lock_flocks*/ |
2282 | dp->dl_time = get_seconds(); | 2290 | dp->dl_time = get_seconds(); |
2283 | 2291 | ||
2284 | /* | 2292 | /* |
@@ -2295,7 +2303,7 @@ void nfsd_break_deleg_cb(struct file_lock *fl) | |||
2295 | /* | 2303 | /* |
2296 | * The file_lock is being reapd. | 2304 | * The file_lock is being reapd. |
2297 | * | 2305 | * |
2298 | * Called by locks_free_lock() with lock_kernel() held. | 2306 | * Called by locks_free_lock() with lock_flocks() held. |
2299 | */ | 2307 | */ |
2300 | static | 2308 | static |
2301 | void nfsd_release_deleg_cb(struct file_lock *fl) | 2309 | void nfsd_release_deleg_cb(struct file_lock *fl) |
@@ -2310,23 +2318,7 @@ void nfsd_release_deleg_cb(struct file_lock *fl) | |||
2310 | } | 2318 | } |
2311 | 2319 | ||
2312 | /* | 2320 | /* |
2313 | * Set the delegation file_lock back pointer. | 2321 | * Called from setlease() with lock_flocks() held |
2314 | * | ||
2315 | * Called from setlease() with lock_kernel() held. | ||
2316 | */ | ||
2317 | static | ||
2318 | void nfsd_copy_lock_deleg_cb(struct file_lock *new, struct file_lock *fl) | ||
2319 | { | ||
2320 | struct nfs4_delegation *dp = (struct nfs4_delegation *)new->fl_owner; | ||
2321 | |||
2322 | dprintk("NFSD: nfsd_copy_lock_deleg_cb: new fl %p dp %p\n", new, dp); | ||
2323 | if (!dp) | ||
2324 | return; | ||
2325 | dp->dl_flock = new; | ||
2326 | } | ||
2327 | |||
2328 | /* | ||
2329 | * Called from setlease() with lock_kernel() held | ||
2330 | */ | 2322 | */ |
2331 | static | 2323 | static |
2332 | int nfsd_same_client_deleg_cb(struct file_lock *onlist, struct file_lock *try) | 2324 | int nfsd_same_client_deleg_cb(struct file_lock *onlist, struct file_lock *try) |
@@ -2355,7 +2347,6 @@ int nfsd_change_deleg_cb(struct file_lock **onlist, int arg) | |||
2355 | static const struct lock_manager_operations nfsd_lease_mng_ops = { | 2347 | static const struct lock_manager_operations nfsd_lease_mng_ops = { |
2356 | .fl_break = nfsd_break_deleg_cb, | 2348 | .fl_break = nfsd_break_deleg_cb, |
2357 | .fl_release_private = nfsd_release_deleg_cb, | 2349 | .fl_release_private = nfsd_release_deleg_cb, |
2358 | .fl_copy_lock = nfsd_copy_lock_deleg_cb, | ||
2359 | .fl_mylease = nfsd_same_client_deleg_cb, | 2350 | .fl_mylease = nfsd_same_client_deleg_cb, |
2360 | .fl_change = nfsd_change_deleg_cb, | 2351 | .fl_change = nfsd_change_deleg_cb, |
2361 | }; | 2352 | }; |
@@ -2661,12 +2652,15 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta | |||
2661 | fl->fl_file = find_readable_file(stp->st_file); | 2652 | fl->fl_file = find_readable_file(stp->st_file); |
2662 | BUG_ON(!fl->fl_file); | 2653 | BUG_ON(!fl->fl_file); |
2663 | fl->fl_pid = current->tgid; | 2654 | fl->fl_pid = current->tgid; |
2655 | dp->dl_flock = fl; | ||
2664 | 2656 | ||
2665 | /* vfs_setlease checks to see if delegation should be handed out. | 2657 | /* vfs_setlease checks to see if delegation should be handed out. |
2666 | * the lock_manager callbacks fl_mylease and fl_change are used | 2658 | * the lock_manager callbacks fl_mylease and fl_change are used |
2667 | */ | 2659 | */ |
2668 | if ((status = vfs_setlease(fl->fl_file, fl->fl_type, &fl))) { | 2660 | if ((status = vfs_setlease(fl->fl_file, fl->fl_type, &fl))) { |
2669 | dprintk("NFSD: setlease failed [%d], no delegation\n", status); | 2661 | dprintk("NFSD: setlease failed [%d], no delegation\n", status); |
2662 | dp->dl_flock = NULL; | ||
2663 | locks_free_lock(fl); | ||
2670 | unhash_delegation(dp); | 2664 | unhash_delegation(dp); |
2671 | flag = NFS4_OPEN_DELEGATE_NONE; | 2665 | flag = NFS4_OPEN_DELEGATE_NONE; |
2672 | goto out; | 2666 | goto out; |
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 4d476ff08ae6..60fce3dc5cb5 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h | |||
@@ -484,18 +484,17 @@ static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp) | |||
484 | static inline void | 484 | static inline void |
485 | set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp) | 485 | set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp) |
486 | { | 486 | { |
487 | BUG_ON(!fhp->fh_pre_saved || !fhp->fh_post_saved); | 487 | BUG_ON(!fhp->fh_pre_saved); |
488 | cinfo->atomic = 1; | 488 | cinfo->atomic = fhp->fh_post_saved; |
489 | cinfo->change_supported = IS_I_VERSION(fhp->fh_dentry->d_inode); | 489 | cinfo->change_supported = IS_I_VERSION(fhp->fh_dentry->d_inode); |
490 | if (cinfo->change_supported) { | 490 | |
491 | cinfo->before_change = fhp->fh_pre_change; | 491 | cinfo->before_change = fhp->fh_pre_change; |
492 | cinfo->after_change = fhp->fh_post_change; | 492 | cinfo->after_change = fhp->fh_post_change; |
493 | } else { | 493 | cinfo->before_ctime_sec = fhp->fh_pre_ctime.tv_sec; |
494 | cinfo->before_ctime_sec = fhp->fh_pre_ctime.tv_sec; | 494 | cinfo->before_ctime_nsec = fhp->fh_pre_ctime.tv_nsec; |
495 | cinfo->before_ctime_nsec = fhp->fh_pre_ctime.tv_nsec; | 495 | cinfo->after_ctime_sec = fhp->fh_post_attr.ctime.tv_sec; |
496 | cinfo->after_ctime_sec = fhp->fh_post_attr.ctime.tv_sec; | 496 | cinfo->after_ctime_nsec = fhp->fh_post_attr.ctime.tv_nsec; |
497 | cinfo->after_ctime_nsec = fhp->fh_post_attr.ctime.tv_nsec; | 497 | |
498 | } | ||
499 | } | 498 | } |
500 | 499 | ||
501 | int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *, void *); | 500 | int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *, void *); |
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index 49c844dab33a..59e5fe742f7b 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c | |||
@@ -335,7 +335,7 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr) | |||
335 | * the device at this point. | 335 | * the device at this point. |
336 | * | 336 | * |
337 | * To prevent nilfs_dat_translate() from returning the | 337 | * To prevent nilfs_dat_translate() from returning the |
338 | * uncommited block number, this makes a copy of the entry | 338 | * uncommitted block number, this makes a copy of the entry |
339 | * buffer and redirects nilfs_dat_translate() to the copy. | 339 | * buffer and redirects nilfs_dat_translate() to the copy. |
340 | */ | 340 | */ |
341 | if (!buffer_nilfs_redirected(entry_bh)) { | 341 | if (!buffer_nilfs_redirected(entry_bh)) { |
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index 33ad25ddd5c4..caf9a6a3fb54 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c | |||
@@ -176,7 +176,6 @@ int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *bh) | |||
176 | int nilfs_init_gcinode(struct inode *inode) | 176 | int nilfs_init_gcinode(struct inode *inode) |
177 | { | 177 | { |
178 | struct nilfs_inode_info *ii = NILFS_I(inode); | 178 | struct nilfs_inode_info *ii = NILFS_I(inode); |
179 | struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; | ||
180 | 179 | ||
181 | inode->i_mode = S_IFREG; | 180 | inode->i_mode = S_IFREG; |
182 | mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); | 181 | mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); |
@@ -186,14 +185,6 @@ int nilfs_init_gcinode(struct inode *inode) | |||
186 | ii->i_flags = 0; | 185 | ii->i_flags = 0; |
187 | nilfs_bmap_init_gc(ii->i_bmap); | 186 | nilfs_bmap_init_gc(ii->i_bmap); |
188 | 187 | ||
189 | /* | ||
190 | * Add the inode to GC inode list. Garbage Collection | ||
191 | * is serialized and no two processes manipulate the | ||
192 | * list simultaneously. | ||
193 | */ | ||
194 | igrab(inode); | ||
195 | list_add(&NILFS_I(inode)->i_dirty, &nilfs->ns_gc_inodes); | ||
196 | |||
197 | return 0; | 188 | return 0; |
198 | } | 189 | } |
199 | 190 | ||
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 3e90f86d5bfe..b185e937a335 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c | |||
@@ -337,6 +337,7 @@ static int nilfs_ioctl_move_blocks(struct super_block *sb, | |||
337 | struct nilfs_argv *argv, void *buf) | 337 | struct nilfs_argv *argv, void *buf) |
338 | { | 338 | { |
339 | size_t nmembs = argv->v_nmembs; | 339 | size_t nmembs = argv->v_nmembs; |
340 | struct the_nilfs *nilfs = NILFS_SB(sb)->s_nilfs; | ||
340 | struct inode *inode; | 341 | struct inode *inode; |
341 | struct nilfs_vdesc *vdesc; | 342 | struct nilfs_vdesc *vdesc; |
342 | struct buffer_head *bh, *n; | 343 | struct buffer_head *bh, *n; |
@@ -349,10 +350,21 @@ static int nilfs_ioctl_move_blocks(struct super_block *sb, | |||
349 | ino = vdesc->vd_ino; | 350 | ino = vdesc->vd_ino; |
350 | cno = vdesc->vd_cno; | 351 | cno = vdesc->vd_cno; |
351 | inode = nilfs_iget_for_gc(sb, ino, cno); | 352 | inode = nilfs_iget_for_gc(sb, ino, cno); |
352 | if (unlikely(inode == NULL)) { | 353 | if (IS_ERR(inode)) { |
353 | ret = -ENOMEM; | 354 | ret = PTR_ERR(inode); |
354 | goto failed; | 355 | goto failed; |
355 | } | 356 | } |
357 | if (list_empty(&NILFS_I(inode)->i_dirty)) { | ||
358 | /* | ||
359 | * Add the inode to GC inode list. Garbage Collection | ||
360 | * is serialized and no two processes manipulate the | ||
361 | * list simultaneously. | ||
362 | */ | ||
363 | igrab(inode); | ||
364 | list_add(&NILFS_I(inode)->i_dirty, | ||
365 | &nilfs->ns_gc_inodes); | ||
366 | } | ||
367 | |||
356 | do { | 368 | do { |
357 | ret = nilfs_ioctl_move_inode_block(inode, vdesc, | 369 | ret = nilfs_ioctl_move_inode_block(inode, vdesc, |
358 | &buffers); | 370 | &buffers); |
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index b04f88eed09e..f35794b97e8e 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c | |||
@@ -92,7 +92,11 @@ static int fanotify_get_response_from_access(struct fsnotify_group *group, | |||
92 | 92 | ||
93 | pr_debug("%s: group=%p event=%p\n", __func__, group, event); | 93 | pr_debug("%s: group=%p event=%p\n", __func__, group, event); |
94 | 94 | ||
95 | wait_event(group->fanotify_data.access_waitq, event->response); | 95 | wait_event(group->fanotify_data.access_waitq, event->response || |
96 | atomic_read(&group->fanotify_data.bypass_perm)); | ||
97 | |||
98 | if (!event->response) /* bypass_perm set */ | ||
99 | return 0; | ||
96 | 100 | ||
97 | /* userspace responded, convert to something usable */ | 101 | /* userspace responded, convert to something usable */ |
98 | spin_lock(&event->lock); | 102 | spin_lock(&event->lock); |
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 063224812b7e..8b61220cffc5 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c | |||
@@ -106,20 +106,29 @@ static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event) | |||
106 | return client_fd; | 106 | return client_fd; |
107 | } | 107 | } |
108 | 108 | ||
109 | static ssize_t fill_event_metadata(struct fsnotify_group *group, | 109 | static int fill_event_metadata(struct fsnotify_group *group, |
110 | struct fanotify_event_metadata *metadata, | 110 | struct fanotify_event_metadata *metadata, |
111 | struct fsnotify_event *event) | 111 | struct fsnotify_event *event) |
112 | { | 112 | { |
113 | int ret = 0; | ||
114 | |||
113 | pr_debug("%s: group=%p metadata=%p event=%p\n", __func__, | 115 | pr_debug("%s: group=%p metadata=%p event=%p\n", __func__, |
114 | group, metadata, event); | 116 | group, metadata, event); |
115 | 117 | ||
116 | metadata->event_len = FAN_EVENT_METADATA_LEN; | 118 | metadata->event_len = FAN_EVENT_METADATA_LEN; |
119 | metadata->metadata_len = FAN_EVENT_METADATA_LEN; | ||
117 | metadata->vers = FANOTIFY_METADATA_VERSION; | 120 | metadata->vers = FANOTIFY_METADATA_VERSION; |
118 | metadata->mask = event->mask & FAN_ALL_OUTGOING_EVENTS; | 121 | metadata->mask = event->mask & FAN_ALL_OUTGOING_EVENTS; |
119 | metadata->pid = pid_vnr(event->tgid); | 122 | metadata->pid = pid_vnr(event->tgid); |
120 | metadata->fd = create_fd(group, event); | 123 | if (unlikely(event->mask & FAN_Q_OVERFLOW)) |
124 | metadata->fd = FAN_NOFD; | ||
125 | else { | ||
126 | metadata->fd = create_fd(group, event); | ||
127 | if (metadata->fd < 0) | ||
128 | ret = metadata->fd; | ||
129 | } | ||
121 | 130 | ||
122 | return metadata->fd; | 131 | return ret; |
123 | } | 132 | } |
124 | 133 | ||
125 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS | 134 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS |
@@ -200,7 +209,7 @@ static int prepare_for_access_response(struct fsnotify_group *group, | |||
200 | 209 | ||
201 | mutex_lock(&group->fanotify_data.access_mutex); | 210 | mutex_lock(&group->fanotify_data.access_mutex); |
202 | 211 | ||
203 | if (group->fanotify_data.bypass_perm) { | 212 | if (atomic_read(&group->fanotify_data.bypass_perm)) { |
204 | mutex_unlock(&group->fanotify_data.access_mutex); | 213 | mutex_unlock(&group->fanotify_data.access_mutex); |
205 | kmem_cache_free(fanotify_response_event_cache, re); | 214 | kmem_cache_free(fanotify_response_event_cache, re); |
206 | event->response = FAN_ALLOW; | 215 | event->response = FAN_ALLOW; |
@@ -257,24 +266,34 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, | |||
257 | 266 | ||
258 | pr_debug("%s: group=%p event=%p\n", __func__, group, event); | 267 | pr_debug("%s: group=%p event=%p\n", __func__, group, event); |
259 | 268 | ||
260 | fd = fill_event_metadata(group, &fanotify_event_metadata, event); | 269 | ret = fill_event_metadata(group, &fanotify_event_metadata, event); |
261 | if (fd < 0) | 270 | if (ret < 0) |
262 | return fd; | 271 | goto out; |
263 | 272 | ||
273 | fd = fanotify_event_metadata.fd; | ||
264 | ret = prepare_for_access_response(group, event, fd); | 274 | ret = prepare_for_access_response(group, event, fd); |
265 | if (ret) | 275 | if (ret) |
266 | goto out_close_fd; | 276 | goto out_close_fd; |
267 | 277 | ||
268 | ret = -EFAULT; | 278 | ret = -EFAULT; |
269 | if (copy_to_user(buf, &fanotify_event_metadata, FAN_EVENT_METADATA_LEN)) | 279 | if (copy_to_user(buf, &fanotify_event_metadata, |
280 | fanotify_event_metadata.event_len)) | ||
270 | goto out_kill_access_response; | 281 | goto out_kill_access_response; |
271 | 282 | ||
272 | return FAN_EVENT_METADATA_LEN; | 283 | return fanotify_event_metadata.event_len; |
273 | 284 | ||
274 | out_kill_access_response: | 285 | out_kill_access_response: |
275 | remove_access_response(group, event, fd); | 286 | remove_access_response(group, event, fd); |
276 | out_close_fd: | 287 | out_close_fd: |
277 | sys_close(fd); | 288 | if (fd != FAN_NOFD) |
289 | sys_close(fd); | ||
290 | out: | ||
291 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS | ||
292 | if (event->mask & FAN_ALL_PERM_EVENTS) { | ||
293 | event->response = FAN_DENY; | ||
294 | wake_up(&group->fanotify_data.access_waitq); | ||
295 | } | ||
296 | #endif | ||
278 | return ret; | 297 | return ret; |
279 | } | 298 | } |
280 | 299 | ||
@@ -382,7 +401,7 @@ static int fanotify_release(struct inode *ignored, struct file *file) | |||
382 | 401 | ||
383 | mutex_lock(&group->fanotify_data.access_mutex); | 402 | mutex_lock(&group->fanotify_data.access_mutex); |
384 | 403 | ||
385 | group->fanotify_data.bypass_perm = true; | 404 | atomic_inc(&group->fanotify_data.bypass_perm); |
386 | 405 | ||
387 | list_for_each_entry_safe(re, lre, &group->fanotify_data.access_list, list) { | 406 | list_for_each_entry_safe(re, lre, &group->fanotify_data.access_list, list) { |
388 | pr_debug("%s: found group=%p re=%p event=%p\n", __func__, group, | 407 | pr_debug("%s: found group=%p re=%p event=%p\n", __func__, group, |
@@ -586,11 +605,10 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, | |||
586 | { | 605 | { |
587 | struct fsnotify_mark *fsn_mark; | 606 | struct fsnotify_mark *fsn_mark; |
588 | __u32 added; | 607 | __u32 added; |
608 | int ret = 0; | ||
589 | 609 | ||
590 | fsn_mark = fsnotify_find_vfsmount_mark(group, mnt); | 610 | fsn_mark = fsnotify_find_vfsmount_mark(group, mnt); |
591 | if (!fsn_mark) { | 611 | if (!fsn_mark) { |
592 | int ret; | ||
593 | |||
594 | if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks) | 612 | if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks) |
595 | return -ENOSPC; | 613 | return -ENOSPC; |
596 | 614 | ||
@@ -600,17 +618,16 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, | |||
600 | 618 | ||
601 | fsnotify_init_mark(fsn_mark, fanotify_free_mark); | 619 | fsnotify_init_mark(fsn_mark, fanotify_free_mark); |
602 | ret = fsnotify_add_mark(fsn_mark, group, NULL, mnt, 0); | 620 | ret = fsnotify_add_mark(fsn_mark, group, NULL, mnt, 0); |
603 | if (ret) { | 621 | if (ret) |
604 | fanotify_free_mark(fsn_mark); | 622 | goto err; |
605 | return ret; | ||
606 | } | ||
607 | } | 623 | } |
608 | added = fanotify_mark_add_to_mask(fsn_mark, mask, flags); | 624 | added = fanotify_mark_add_to_mask(fsn_mark, mask, flags); |
609 | fsnotify_put_mark(fsn_mark); | 625 | |
610 | if (added & ~mnt->mnt_fsnotify_mask) | 626 | if (added & ~mnt->mnt_fsnotify_mask) |
611 | fsnotify_recalc_vfsmount_mask(mnt); | 627 | fsnotify_recalc_vfsmount_mask(mnt); |
612 | 628 | err: | |
613 | return 0; | 629 | fsnotify_put_mark(fsn_mark); |
630 | return ret; | ||
614 | } | 631 | } |
615 | 632 | ||
616 | static int fanotify_add_inode_mark(struct fsnotify_group *group, | 633 | static int fanotify_add_inode_mark(struct fsnotify_group *group, |
@@ -619,6 +636,7 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group, | |||
619 | { | 636 | { |
620 | struct fsnotify_mark *fsn_mark; | 637 | struct fsnotify_mark *fsn_mark; |
621 | __u32 added; | 638 | __u32 added; |
639 | int ret = 0; | ||
622 | 640 | ||
623 | pr_debug("%s: group=%p inode=%p\n", __func__, group, inode); | 641 | pr_debug("%s: group=%p inode=%p\n", __func__, group, inode); |
624 | 642 | ||
@@ -634,8 +652,6 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group, | |||
634 | 652 | ||
635 | fsn_mark = fsnotify_find_inode_mark(group, inode); | 653 | fsn_mark = fsnotify_find_inode_mark(group, inode); |
636 | if (!fsn_mark) { | 654 | if (!fsn_mark) { |
637 | int ret; | ||
638 | |||
639 | if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks) | 655 | if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks) |
640 | return -ENOSPC; | 656 | return -ENOSPC; |
641 | 657 | ||
@@ -645,16 +661,16 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group, | |||
645 | 661 | ||
646 | fsnotify_init_mark(fsn_mark, fanotify_free_mark); | 662 | fsnotify_init_mark(fsn_mark, fanotify_free_mark); |
647 | ret = fsnotify_add_mark(fsn_mark, group, inode, NULL, 0); | 663 | ret = fsnotify_add_mark(fsn_mark, group, inode, NULL, 0); |
648 | if (ret) { | 664 | if (ret) |
649 | fanotify_free_mark(fsn_mark); | 665 | goto err; |
650 | return ret; | ||
651 | } | ||
652 | } | 666 | } |
653 | added = fanotify_mark_add_to_mask(fsn_mark, mask, flags); | 667 | added = fanotify_mark_add_to_mask(fsn_mark, mask, flags); |
654 | fsnotify_put_mark(fsn_mark); | 668 | |
655 | if (added & ~inode->i_fsnotify_mask) | 669 | if (added & ~inode->i_fsnotify_mask) |
656 | fsnotify_recalc_inode_mask(inode); | 670 | fsnotify_recalc_inode_mask(inode); |
657 | return 0; | 671 | err: |
672 | fsnotify_put_mark(fsn_mark); | ||
673 | return ret; | ||
658 | } | 674 | } |
659 | 675 | ||
660 | /* fanotify syscalls */ | 676 | /* fanotify syscalls */ |
@@ -687,8 +703,10 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) | |||
687 | 703 | ||
688 | /* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */ | 704 | /* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */ |
689 | group = fsnotify_alloc_group(&fanotify_fsnotify_ops); | 705 | group = fsnotify_alloc_group(&fanotify_fsnotify_ops); |
690 | if (IS_ERR(group)) | 706 | if (IS_ERR(group)) { |
707 | free_uid(user); | ||
691 | return PTR_ERR(group); | 708 | return PTR_ERR(group); |
709 | } | ||
692 | 710 | ||
693 | group->fanotify_data.user = user; | 711 | group->fanotify_data.user = user; |
694 | atomic_inc(&user->fanotify_listeners); | 712 | atomic_inc(&user->fanotify_listeners); |
@@ -698,6 +716,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) | |||
698 | mutex_init(&group->fanotify_data.access_mutex); | 716 | mutex_init(&group->fanotify_data.access_mutex); |
699 | init_waitqueue_head(&group->fanotify_data.access_waitq); | 717 | init_waitqueue_head(&group->fanotify_data.access_waitq); |
700 | INIT_LIST_HEAD(&group->fanotify_data.access_list); | 718 | INIT_LIST_HEAD(&group->fanotify_data.access_list); |
719 | atomic_set(&group->fanotify_data.bypass_perm, 0); | ||
701 | #endif | 720 | #endif |
702 | switch (flags & FAN_ALL_CLASS_BITS) { | 721 | switch (flags & FAN_ALL_CLASS_BITS) { |
703 | case FAN_CLASS_NOTIF: | 722 | case FAN_CLASS_NOTIF: |
@@ -764,8 +783,10 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags, | |||
764 | if (flags & ~FAN_ALL_MARK_FLAGS) | 783 | if (flags & ~FAN_ALL_MARK_FLAGS) |
765 | return -EINVAL; | 784 | return -EINVAL; |
766 | switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) { | 785 | switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) { |
767 | case FAN_MARK_ADD: | 786 | case FAN_MARK_ADD: /* fallthrough */ |
768 | case FAN_MARK_REMOVE: | 787 | case FAN_MARK_REMOVE: |
788 | if (!mask) | ||
789 | return -EINVAL; | ||
769 | case FAN_MARK_FLUSH: | 790 | case FAN_MARK_FLUSH: |
770 | break; | 791 | break; |
771 | default: | 792 | default: |
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 444c305a468c..4cd5d5d78f9f 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c | |||
@@ -752,6 +752,7 @@ SYSCALL_DEFINE1(inotify_init1, int, flags) | |||
752 | if (ret >= 0) | 752 | if (ret >= 0) |
753 | return ret; | 753 | return ret; |
754 | 754 | ||
755 | fsnotify_put_group(group); | ||
755 | atomic_dec(&user->inotify_devs); | 756 | atomic_dec(&user->inotify_devs); |
756 | out_free_uid: | 757 | out_free_uid: |
757 | free_uid(user); | 758 | free_uid(user); |
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 52c7557f3e25..9f26ac9be2a4 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c | |||
@@ -1964,8 +1964,10 @@ static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *g | |||
1964 | if (reg == NULL) | 1964 | if (reg == NULL) |
1965 | return ERR_PTR(-ENOMEM); | 1965 | return ERR_PTR(-ENOMEM); |
1966 | 1966 | ||
1967 | if (strlen(name) > O2HB_MAX_REGION_NAME_LEN) | 1967 | if (strlen(name) > O2HB_MAX_REGION_NAME_LEN) { |
1968 | return ERR_PTR(-ENAMETOOLONG); | 1968 | ret = -ENAMETOOLONG; |
1969 | goto free; | ||
1970 | } | ||
1969 | 1971 | ||
1970 | spin_lock(&o2hb_live_lock); | 1972 | spin_lock(&o2hb_live_lock); |
1971 | reg->hr_region_num = 0; | 1973 | reg->hr_region_num = 0; |
@@ -1974,7 +1976,8 @@ static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *g | |||
1974 | O2NM_MAX_REGIONS); | 1976 | O2NM_MAX_REGIONS); |
1975 | if (reg->hr_region_num >= O2NM_MAX_REGIONS) { | 1977 | if (reg->hr_region_num >= O2NM_MAX_REGIONS) { |
1976 | spin_unlock(&o2hb_live_lock); | 1978 | spin_unlock(&o2hb_live_lock); |
1977 | return ERR_PTR(-EFBIG); | 1979 | ret = -EFBIG; |
1980 | goto free; | ||
1978 | } | 1981 | } |
1979 | set_bit(reg->hr_region_num, o2hb_region_bitmap); | 1982 | set_bit(reg->hr_region_num, o2hb_region_bitmap); |
1980 | } | 1983 | } |
@@ -1986,10 +1989,13 @@ static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *g | |||
1986 | ret = o2hb_debug_region_init(reg, o2hb_debug_dir); | 1989 | ret = o2hb_debug_region_init(reg, o2hb_debug_dir); |
1987 | if (ret) { | 1990 | if (ret) { |
1988 | config_item_put(®->hr_item); | 1991 | config_item_put(®->hr_item); |
1989 | return ERR_PTR(ret); | 1992 | goto free; |
1990 | } | 1993 | } |
1991 | 1994 | ||
1992 | return ®->hr_item; | 1995 | return ®->hr_item; |
1996 | free: | ||
1997 | kfree(reg); | ||
1998 | return ERR_PTR(ret); | ||
1993 | } | 1999 | } |
1994 | 2000 | ||
1995 | static void o2hb_heartbeat_group_drop_item(struct config_group *group, | 2001 | static void o2hb_heartbeat_group_drop_item(struct config_group *group, |
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index edaded48e7e9..895532ac4d98 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c | |||
@@ -476,7 +476,6 @@ static void ocfs2_dentry_iput(struct dentry *dentry, struct inode *inode) | |||
476 | 476 | ||
477 | out: | 477 | out: |
478 | iput(inode); | 478 | iput(inode); |
479 | ocfs2_dentry_attach_gen(dentry); | ||
480 | } | 479 | } |
481 | 480 | ||
482 | /* | 481 | /* |
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 58a93b953735..cc2aaa96cfe5 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c | |||
@@ -959,7 +959,7 @@ static int dlm_match_regions(struct dlm_ctxt *dlm, | |||
959 | r += O2HB_MAX_REGION_NAME_LEN; | 959 | r += O2HB_MAX_REGION_NAME_LEN; |
960 | } | 960 | } |
961 | 961 | ||
962 | local = kmalloc(sizeof(qr->qr_regions), GFP_KERNEL); | 962 | local = kmalloc(sizeof(qr->qr_regions), GFP_ATOMIC); |
963 | if (!local) { | 963 | if (!local) { |
964 | status = -ENOMEM; | 964 | status = -ENOMEM; |
965 | goto bail; | 965 | goto bail; |
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index d8408217e3bd..70dd3b1798f1 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
@@ -159,7 +159,9 @@ struct ocfs2_lock_res { | |||
159 | char l_name[OCFS2_LOCK_ID_MAX_LEN]; | 159 | char l_name[OCFS2_LOCK_ID_MAX_LEN]; |
160 | unsigned int l_ro_holders; | 160 | unsigned int l_ro_holders; |
161 | unsigned int l_ex_holders; | 161 | unsigned int l_ex_holders; |
162 | unsigned char l_level; | 162 | signed char l_level; |
163 | signed char l_requested; | ||
164 | signed char l_blocking; | ||
163 | 165 | ||
164 | /* Data packed - type enum ocfs2_lock_type */ | 166 | /* Data packed - type enum ocfs2_lock_type */ |
165 | unsigned char l_type; | 167 | unsigned char l_type; |
@@ -169,8 +171,6 @@ struct ocfs2_lock_res { | |||
169 | unsigned char l_action; | 171 | unsigned char l_action; |
170 | /* Data packed - enum type ocfs2_unlock_action */ | 172 | /* Data packed - enum type ocfs2_unlock_action */ |
171 | unsigned char l_unlock_action; | 173 | unsigned char l_unlock_action; |
172 | unsigned char l_requested; | ||
173 | unsigned char l_blocking; | ||
174 | unsigned int l_pending_gen; | 174 | unsigned int l_pending_gen; |
175 | 175 | ||
176 | spinlock_t l_lock; | 176 | spinlock_t l_lock; |
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index 252e7c82f929..a5ebe421195f 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c | |||
@@ -190,7 +190,7 @@ static struct ocfs2_live_connection *ocfs2_connection_find(const char *name) | |||
190 | return c; | 190 | return c; |
191 | } | 191 | } |
192 | 192 | ||
193 | return c; | 193 | return NULL; |
194 | } | 194 | } |
195 | 195 | ||
196 | /* | 196 | /* |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index f02c0ef31578..cfeab7ce3697 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -41,7 +41,6 @@ | |||
41 | #include <linux/mount.h> | 41 | #include <linux/mount.h> |
42 | #include <linux/seq_file.h> | 42 | #include <linux/seq_file.h> |
43 | #include <linux/quotaops.h> | 43 | #include <linux/quotaops.h> |
44 | #include <linux/smp_lock.h> | ||
45 | 44 | ||
46 | #define MLOG_MASK_PREFIX ML_SUPER | 45 | #define MLOG_MASK_PREFIX ML_SUPER |
47 | #include <cluster/masklog.h> | 46 | #include <cluster/masklog.h> |
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index ddb1f41376e5..911e61f348fc 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c | |||
@@ -418,7 +418,7 @@ out_no_root: | |||
418 | static struct dentry *openprom_mount(struct file_system_type *fs_type, | 418 | static struct dentry *openprom_mount(struct file_system_type *fs_type, |
419 | int flags, const char *dev_name, void *data) | 419 | int flags, const char *dev_name, void *data) |
420 | { | 420 | { |
421 | return mount_single(fs_type, flags, data, openprom_fill_super) | 421 | return mount_single(fs_type, flags, data, openprom_fill_super); |
422 | } | 422 | } |
423 | 423 | ||
424 | static struct file_system_type openprom_fs_type = { | 424 | static struct file_system_type openprom_fs_type = { |
@@ -1199,12 +1199,24 @@ int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf, | |||
1199 | return ret; | 1199 | return ret; |
1200 | } | 1200 | } |
1201 | 1201 | ||
1202 | /* | ||
1203 | * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same | ||
1204 | * location, so checking ->i_pipe is not enough to verify that this is a | ||
1205 | * pipe. | ||
1206 | */ | ||
1207 | struct pipe_inode_info *get_pipe_info(struct file *file) | ||
1208 | { | ||
1209 | struct inode *i = file->f_path.dentry->d_inode; | ||
1210 | |||
1211 | return S_ISFIFO(i->i_mode) ? i->i_pipe : NULL; | ||
1212 | } | ||
1213 | |||
1202 | long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) | 1214 | long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) |
1203 | { | 1215 | { |
1204 | struct pipe_inode_info *pipe; | 1216 | struct pipe_inode_info *pipe; |
1205 | long ret; | 1217 | long ret; |
1206 | 1218 | ||
1207 | pipe = file->f_path.dentry->d_inode->i_pipe; | 1219 | pipe = get_pipe_info(file); |
1208 | if (!pipe) | 1220 | if (!pipe) |
1209 | return -EBADF; | 1221 | return -EBADF; |
1210 | 1222 | ||
diff --git a/fs/proc/base.c b/fs/proc/base.c index f3d02ca461ec..182845147fe4 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -1574,7 +1574,7 @@ static int do_proc_readlink(struct path *path, char __user *buffer, int buflen) | |||
1574 | if (!tmp) | 1574 | if (!tmp) |
1575 | return -ENOMEM; | 1575 | return -ENOMEM; |
1576 | 1576 | ||
1577 | pathname = d_path_with_unreachable(path, tmp, PAGE_SIZE); | 1577 | pathname = d_path(path, tmp, PAGE_SIZE); |
1578 | len = PTR_ERR(pathname); | 1578 | len = PTR_ERR(pathname); |
1579 | if (IS_ERR(pathname)) | 1579 | if (IS_ERR(pathname)) |
1580 | goto out; | 1580 | goto out; |
diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 9c2b5f484879..3ddb6068177c 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c | |||
@@ -16,7 +16,6 @@ | |||
16 | #include <linux/limits.h> | 16 | #include <linux/limits.h> |
17 | #include <linux/init.h> | 17 | #include <linux/init.h> |
18 | #include <linux/module.h> | 18 | #include <linux/module.h> |
19 | #include <linux/smp_lock.h> | ||
20 | #include <linux/sysctl.h> | 19 | #include <linux/sysctl.h> |
21 | #include <linux/slab.h> | 20 | #include <linux/slab.h> |
22 | 21 | ||
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index da6b01d70f01..c126c83b9a45 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -706,6 +706,7 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask, | |||
706 | * skip over unmapped regions. | 706 | * skip over unmapped regions. |
707 | */ | 707 | */ |
708 | #define PAGEMAP_WALK_SIZE (PMD_SIZE) | 708 | #define PAGEMAP_WALK_SIZE (PMD_SIZE) |
709 | #define PAGEMAP_WALK_MASK (PMD_MASK) | ||
709 | static ssize_t pagemap_read(struct file *file, char __user *buf, | 710 | static ssize_t pagemap_read(struct file *file, char __user *buf, |
710 | size_t count, loff_t *ppos) | 711 | size_t count, loff_t *ppos) |
711 | { | 712 | { |
@@ -776,7 +777,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, | |||
776 | unsigned long end; | 777 | unsigned long end; |
777 | 778 | ||
778 | pm.pos = 0; | 779 | pm.pos = 0; |
779 | end = start_vaddr + PAGEMAP_WALK_SIZE; | 780 | end = (start_vaddr + PAGEMAP_WALK_SIZE) & PAGEMAP_WALK_MASK; |
780 | /* overflow ? */ | 781 | /* overflow ? */ |
781 | if (end < start_vaddr || end > end_vaddr) | 782 | if (end < start_vaddr || end > end_vaddr) |
782 | end = end_vaddr; | 783 | end = end_vaddr; |
diff --git a/fs/read_write.c b/fs/read_write.c index 431a0ed610c8..5d431bacbea9 100644 --- a/fs/read_write.c +++ b/fs/read_write.c | |||
@@ -9,7 +9,6 @@ | |||
9 | #include <linux/fcntl.h> | 9 | #include <linux/fcntl.h> |
10 | #include <linux/file.h> | 10 | #include <linux/file.h> |
11 | #include <linux/uio.h> | 11 | #include <linux/uio.h> |
12 | #include <linux/smp_lock.h> | ||
13 | #include <linux/fsnotify.h> | 12 | #include <linux/fsnotify.h> |
14 | #include <linux/security.h> | 13 | #include <linux/security.h> |
15 | #include <linux/module.h> | 14 | #include <linux/module.h> |
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 41656d40dc5c..0bae036831e2 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c | |||
@@ -8,7 +8,6 @@ | |||
8 | #include <linux/reiserfs_acl.h> | 8 | #include <linux/reiserfs_acl.h> |
9 | #include <linux/reiserfs_xattr.h> | 9 | #include <linux/reiserfs_xattr.h> |
10 | #include <linux/exportfs.h> | 10 | #include <linux/exportfs.h> |
11 | #include <linux/smp_lock.h> | ||
12 | #include <linux/pagemap.h> | 11 | #include <linux/pagemap.h> |
13 | #include <linux/highmem.h> | 12 | #include <linux/highmem.h> |
14 | #include <linux/slab.h> | 13 | #include <linux/slab.h> |
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c index adf22b485cea..79265fdc317a 100644 --- a/fs/reiserfs/ioctl.c +++ b/fs/reiserfs/ioctl.c | |||
@@ -9,7 +9,6 @@ | |||
9 | #include <linux/time.h> | 9 | #include <linux/time.h> |
10 | #include <asm/uaccess.h> | 10 | #include <asm/uaccess.h> |
11 | #include <linux/pagemap.h> | 11 | #include <linux/pagemap.h> |
12 | #include <linux/smp_lock.h> | ||
13 | #include <linux/compat.h> | 12 | #include <linux/compat.h> |
14 | 13 | ||
15 | /* | 14 | /* |
@@ -184,12 +183,11 @@ int reiserfs_unpack(struct inode *inode, struct file *filp) | |||
184 | return 0; | 183 | return 0; |
185 | } | 184 | } |
186 | 185 | ||
187 | /* we need to make sure nobody is changing the file size beneath | ||
188 | ** us | ||
189 | */ | ||
190 | reiserfs_mutex_lock_safe(&inode->i_mutex, inode->i_sb); | ||
191 | depth = reiserfs_write_lock_once(inode->i_sb); | 186 | depth = reiserfs_write_lock_once(inode->i_sb); |
192 | 187 | ||
188 | /* we need to make sure nobody is changing the file size beneath us */ | ||
189 | reiserfs_mutex_lock_safe(&inode->i_mutex, inode->i_sb); | ||
190 | |||
193 | write_from = inode->i_size & (blocksize - 1); | 191 | write_from = inode->i_size & (blocksize - 1); |
194 | /* if we are on a block boundary, we are already unpacked. */ | 192 | /* if we are on a block boundary, we are already unpacked. */ |
195 | if (write_from == 0) { | 193 | if (write_from == 0) { |
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 076c8b194682..d31bce1a9f90 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c | |||
@@ -43,7 +43,6 @@ | |||
43 | #include <linux/fcntl.h> | 43 | #include <linux/fcntl.h> |
44 | #include <linux/stat.h> | 44 | #include <linux/stat.h> |
45 | #include <linux/string.h> | 45 | #include <linux/string.h> |
46 | #include <linux/smp_lock.h> | ||
47 | #include <linux/buffer_head.h> | 46 | #include <linux/buffer_head.h> |
48 | #include <linux/workqueue.h> | 47 | #include <linux/workqueue.h> |
49 | #include <linux/writeback.h> | 48 | #include <linux/writeback.h> |
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 3bf7a6457f4d..b243117b8752 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c | |||
@@ -28,7 +28,6 @@ | |||
28 | #include <linux/mount.h> | 28 | #include <linux/mount.h> |
29 | #include <linux/namei.h> | 29 | #include <linux/namei.h> |
30 | #include <linux/crc32.h> | 30 | #include <linux/crc32.h> |
31 | #include <linux/smp_lock.h> | ||
32 | 31 | ||
33 | struct file_system_type reiserfs_fs_type; | 32 | struct file_system_type reiserfs_fs_type; |
34 | 33 | ||
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index 536d697a8a28..90d2fcb67a31 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c | |||
@@ -472,7 +472,9 @@ int reiserfs_acl_chmod(struct inode *inode) | |||
472 | struct reiserfs_transaction_handle th; | 472 | struct reiserfs_transaction_handle th; |
473 | size_t size = reiserfs_xattr_nblocks(inode, | 473 | size_t size = reiserfs_xattr_nblocks(inode, |
474 | reiserfs_acl_size(clone->a_count)); | 474 | reiserfs_acl_size(clone->a_count)); |
475 | reiserfs_write_lock(inode->i_sb); | 475 | int depth; |
476 | |||
477 | depth = reiserfs_write_lock_once(inode->i_sb); | ||
476 | error = journal_begin(&th, inode->i_sb, size * 2); | 478 | error = journal_begin(&th, inode->i_sb, size * 2); |
477 | if (!error) { | 479 | if (!error) { |
478 | int error2; | 480 | int error2; |
@@ -482,7 +484,7 @@ int reiserfs_acl_chmod(struct inode *inode) | |||
482 | if (error2) | 484 | if (error2) |
483 | error = error2; | 485 | error = error2; |
484 | } | 486 | } |
485 | reiserfs_write_unlock(inode->i_sb); | 487 | reiserfs_write_unlock_once(inode->i_sb, depth); |
486 | } | 488 | } |
487 | posix_acl_release(clone); | 489 | posix_acl_release(clone); |
488 | return error; | 490 | return error; |
diff --git a/fs/splice.c b/fs/splice.c index 8f1dfaecc8f0..ce2f02579e35 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -1311,18 +1311,6 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, | |||
1311 | static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, | 1311 | static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, |
1312 | struct pipe_inode_info *opipe, | 1312 | struct pipe_inode_info *opipe, |
1313 | size_t len, unsigned int flags); | 1313 | size_t len, unsigned int flags); |
1314 | /* | ||
1315 | * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same | ||
1316 | * location, so checking ->i_pipe is not enough to verify that this is a | ||
1317 | * pipe. | ||
1318 | */ | ||
1319 | static inline struct pipe_inode_info *pipe_info(struct inode *inode) | ||
1320 | { | ||
1321 | if (S_ISFIFO(inode->i_mode)) | ||
1322 | return inode->i_pipe; | ||
1323 | |||
1324 | return NULL; | ||
1325 | } | ||
1326 | 1314 | ||
1327 | /* | 1315 | /* |
1328 | * Determine where to splice to/from. | 1316 | * Determine where to splice to/from. |
@@ -1336,8 +1324,8 @@ static long do_splice(struct file *in, loff_t __user *off_in, | |||
1336 | loff_t offset, *off; | 1324 | loff_t offset, *off; |
1337 | long ret; | 1325 | long ret; |
1338 | 1326 | ||
1339 | ipipe = pipe_info(in->f_path.dentry->d_inode); | 1327 | ipipe = get_pipe_info(in); |
1340 | opipe = pipe_info(out->f_path.dentry->d_inode); | 1328 | opipe = get_pipe_info(out); |
1341 | 1329 | ||
1342 | if (ipipe && opipe) { | 1330 | if (ipipe && opipe) { |
1343 | if (off_in || off_out) | 1331 | if (off_in || off_out) |
@@ -1555,7 +1543,7 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *iov, | |||
1555 | int error; | 1543 | int error; |
1556 | long ret; | 1544 | long ret; |
1557 | 1545 | ||
1558 | pipe = pipe_info(file->f_path.dentry->d_inode); | 1546 | pipe = get_pipe_info(file); |
1559 | if (!pipe) | 1547 | if (!pipe) |
1560 | return -EBADF; | 1548 | return -EBADF; |
1561 | 1549 | ||
@@ -1642,7 +1630,7 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov, | |||
1642 | }; | 1630 | }; |
1643 | long ret; | 1631 | long ret; |
1644 | 1632 | ||
1645 | pipe = pipe_info(file->f_path.dentry->d_inode); | 1633 | pipe = get_pipe_info(file); |
1646 | if (!pipe) | 1634 | if (!pipe) |
1647 | return -EBADF; | 1635 | return -EBADF; |
1648 | 1636 | ||
@@ -2022,8 +2010,8 @@ static int link_pipe(struct pipe_inode_info *ipipe, | |||
2022 | static long do_tee(struct file *in, struct file *out, size_t len, | 2010 | static long do_tee(struct file *in, struct file *out, size_t len, |
2023 | unsigned int flags) | 2011 | unsigned int flags) |
2024 | { | 2012 | { |
2025 | struct pipe_inode_info *ipipe = pipe_info(in->f_path.dentry->d_inode); | 2013 | struct pipe_inode_info *ipipe = get_pipe_info(in); |
2026 | struct pipe_inode_info *opipe = pipe_info(out->f_path.dentry->d_inode); | 2014 | struct pipe_inode_info *opipe = get_pipe_info(out); |
2027 | int ret = -EINVAL; | 2015 | int ret = -EINVAL; |
2028 | 2016 | ||
2029 | /* | 2017 | /* |
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index c9af48fffcd7..691f61223ed6 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c | |||
@@ -934,7 +934,6 @@ xfs_aops_discard_page( | |||
934 | struct xfs_inode *ip = XFS_I(inode); | 934 | struct xfs_inode *ip = XFS_I(inode); |
935 | struct buffer_head *bh, *head; | 935 | struct buffer_head *bh, *head; |
936 | loff_t offset = page_offset(page); | 936 | loff_t offset = page_offset(page); |
937 | ssize_t len = 1 << inode->i_blkbits; | ||
938 | 937 | ||
939 | if (!xfs_is_delayed_page(page, IO_DELAY)) | 938 | if (!xfs_is_delayed_page(page, IO_DELAY)) |
940 | goto out_invalidate; | 939 | goto out_invalidate; |
@@ -949,58 +948,14 @@ xfs_aops_discard_page( | |||
949 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 948 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
950 | bh = head = page_buffers(page); | 949 | bh = head = page_buffers(page); |
951 | do { | 950 | do { |
952 | int done; | ||
953 | xfs_fileoff_t offset_fsb; | ||
954 | xfs_bmbt_irec_t imap; | ||
955 | int nimaps = 1; | ||
956 | int error; | 951 | int error; |
957 | xfs_fsblock_t firstblock; | 952 | xfs_fileoff_t start_fsb; |
958 | xfs_bmap_free_t flist; | ||
959 | 953 | ||
960 | if (!buffer_delay(bh)) | 954 | if (!buffer_delay(bh)) |
961 | goto next_buffer; | 955 | goto next_buffer; |
962 | 956 | ||
963 | offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); | 957 | start_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); |
964 | 958 | error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1); | |
965 | /* | ||
966 | * Map the range first and check that it is a delalloc extent | ||
967 | * before trying to unmap the range. Otherwise we will be | ||
968 | * trying to remove a real extent (which requires a | ||
969 | * transaction) or a hole, which is probably a bad idea... | ||
970 | */ | ||
971 | error = xfs_bmapi(NULL, ip, offset_fsb, 1, | ||
972 | XFS_BMAPI_ENTIRE, NULL, 0, &imap, | ||
973 | &nimaps, NULL); | ||
974 | |||
975 | if (error) { | ||
976 | /* something screwed, just bail */ | ||
977 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { | ||
978 | xfs_fs_cmn_err(CE_ALERT, ip->i_mount, | ||
979 | "page discard failed delalloc mapping lookup."); | ||
980 | } | ||
981 | break; | ||
982 | } | ||
983 | if (!nimaps) { | ||
984 | /* nothing there */ | ||
985 | goto next_buffer; | ||
986 | } | ||
987 | if (imap.br_startblock != DELAYSTARTBLOCK) { | ||
988 | /* been converted, ignore */ | ||
989 | goto next_buffer; | ||
990 | } | ||
991 | WARN_ON(imap.br_blockcount == 0); | ||
992 | |||
993 | /* | ||
994 | * Note: while we initialise the firstblock/flist pair, they | ||
995 | * should never be used because blocks should never be | ||
996 | * allocated or freed for a delalloc extent and hence we need | ||
997 | * don't cancel or finish them after the xfs_bunmapi() call. | ||
998 | */ | ||
999 | xfs_bmap_init(&flist, &firstblock); | ||
1000 | error = xfs_bunmapi(NULL, ip, offset_fsb, 1, 0, 1, &firstblock, | ||
1001 | &flist, &done); | ||
1002 | |||
1003 | ASSERT(!flist.xbf_count && !flist.xbf_first); | ||
1004 | if (error) { | 959 | if (error) { |
1005 | /* something screwed, just bail */ | 960 | /* something screwed, just bail */ |
1006 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { | 961 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { |
@@ -1010,7 +965,7 @@ xfs_aops_discard_page( | |||
1010 | break; | 965 | break; |
1011 | } | 966 | } |
1012 | next_buffer: | 967 | next_buffer: |
1013 | offset += len; | 968 | offset += 1 << inode->i_blkbits; |
1014 | 969 | ||
1015 | } while ((bh = bh->b_this_page) != head); | 970 | } while ((bh = bh->b_this_page) != head); |
1016 | 971 | ||
@@ -1111,11 +1066,12 @@ xfs_vm_writepage( | |||
1111 | uptodate = 0; | 1066 | uptodate = 0; |
1112 | 1067 | ||
1113 | /* | 1068 | /* |
1114 | * A hole may still be marked uptodate because discard_buffer | 1069 | * set_page_dirty dirties all buffers in a page, independent |
1115 | * leaves the flag set. | 1070 | * of their state. The dirty state however is entirely |
1071 | * meaningless for holes (!mapped && uptodate), so skip | ||
1072 | * buffers covering holes here. | ||
1116 | */ | 1073 | */ |
1117 | if (!buffer_mapped(bh) && buffer_uptodate(bh)) { | 1074 | if (!buffer_mapped(bh) && buffer_uptodate(bh)) { |
1118 | ASSERT(!buffer_dirty(bh)); | ||
1119 | imap_valid = 0; | 1075 | imap_valid = 0; |
1120 | continue; | 1076 | continue; |
1121 | } | 1077 | } |
@@ -1504,11 +1460,42 @@ xfs_vm_write_failed( | |||
1504 | struct inode *inode = mapping->host; | 1460 | struct inode *inode = mapping->host; |
1505 | 1461 | ||
1506 | if (to > inode->i_size) { | 1462 | if (to > inode->i_size) { |
1507 | struct iattr ia = { | 1463 | /* |
1508 | .ia_valid = ATTR_SIZE | ATTR_FORCE, | 1464 | * punch out the delalloc blocks we have already allocated. We |
1509 | .ia_size = inode->i_size, | 1465 | * don't call xfs_setattr() to do this as we may be in the |
1510 | }; | 1466 | * middle of a multi-iovec write and so the vfs inode->i_size |
1511 | xfs_setattr(XFS_I(inode), &ia, XFS_ATTR_NOLOCK); | 1467 | * will not match the xfs ip->i_size and so it will zero too |
1468 | * much. Hence we jus truncate the page cache to zero what is | ||
1469 | * necessary and punch the delalloc blocks directly. | ||
1470 | */ | ||
1471 | struct xfs_inode *ip = XFS_I(inode); | ||
1472 | xfs_fileoff_t start_fsb; | ||
1473 | xfs_fileoff_t end_fsb; | ||
1474 | int error; | ||
1475 | |||
1476 | truncate_pagecache(inode, to, inode->i_size); | ||
1477 | |||
1478 | /* | ||
1479 | * Check if there are any blocks that are outside of i_size | ||
1480 | * that need to be trimmed back. | ||
1481 | */ | ||
1482 | start_fsb = XFS_B_TO_FSB(ip->i_mount, inode->i_size) + 1; | ||
1483 | end_fsb = XFS_B_TO_FSB(ip->i_mount, to); | ||
1484 | if (end_fsb <= start_fsb) | ||
1485 | return; | ||
1486 | |||
1487 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
1488 | error = xfs_bmap_punch_delalloc_range(ip, start_fsb, | ||
1489 | end_fsb - start_fsb); | ||
1490 | if (error) { | ||
1491 | /* something screwed, just bail */ | ||
1492 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { | ||
1493 | xfs_fs_cmn_err(CE_ALERT, ip->i_mount, | ||
1494 | "xfs_vm_write_failed: unable to clean up ino %lld", | ||
1495 | ip->i_ino); | ||
1496 | } | ||
1497 | } | ||
1498 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
1512 | } | 1499 | } |
1513 | } | 1500 | } |
1514 | 1501 | ||
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 63fd2c07cb57..4c5deb6e9e31 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -488,29 +488,16 @@ found: | |||
488 | spin_unlock(&pag->pag_buf_lock); | 488 | spin_unlock(&pag->pag_buf_lock); |
489 | xfs_perag_put(pag); | 489 | xfs_perag_put(pag); |
490 | 490 | ||
491 | /* Attempt to get the semaphore without sleeping, | 491 | if (xfs_buf_cond_lock(bp)) { |
492 | * if this does not work then we need to drop the | 492 | /* failed, so wait for the lock if requested. */ |
493 | * spinlock and do a hard attempt on the semaphore. | ||
494 | */ | ||
495 | if (down_trylock(&bp->b_sema)) { | ||
496 | if (!(flags & XBF_TRYLOCK)) { | 493 | if (!(flags & XBF_TRYLOCK)) { |
497 | /* wait for buffer ownership */ | ||
498 | xfs_buf_lock(bp); | 494 | xfs_buf_lock(bp); |
499 | XFS_STATS_INC(xb_get_locked_waited); | 495 | XFS_STATS_INC(xb_get_locked_waited); |
500 | } else { | 496 | } else { |
501 | /* We asked for a trylock and failed, no need | ||
502 | * to look at file offset and length here, we | ||
503 | * know that this buffer at least overlaps our | ||
504 | * buffer and is locked, therefore our buffer | ||
505 | * either does not exist, or is this buffer. | ||
506 | */ | ||
507 | xfs_buf_rele(bp); | 497 | xfs_buf_rele(bp); |
508 | XFS_STATS_INC(xb_busy_locked); | 498 | XFS_STATS_INC(xb_busy_locked); |
509 | return NULL; | 499 | return NULL; |
510 | } | 500 | } |
511 | } else { | ||
512 | /* trylock worked */ | ||
513 | XB_SET_OWNER(bp); | ||
514 | } | 501 | } |
515 | 502 | ||
516 | if (bp->b_flags & XBF_STALE) { | 503 | if (bp->b_flags & XBF_STALE) { |
@@ -876,10 +863,18 @@ xfs_buf_rele( | |||
876 | */ | 863 | */ |
877 | 864 | ||
878 | /* | 865 | /* |
879 | * Locks a buffer object, if it is not already locked. | 866 | * Locks a buffer object, if it is not already locked. Note that this in |
880 | * Note that this in no way locks the underlying pages, so it is only | 867 | * no way locks the underlying pages, so it is only useful for |
881 | * useful for synchronizing concurrent use of buffer objects, not for | 868 | * synchronizing concurrent use of buffer objects, not for synchronizing |
882 | * synchronizing independent access to the underlying pages. | 869 | * independent access to the underlying pages. |
870 | * | ||
871 | * If we come across a stale, pinned, locked buffer, we know that we are | ||
872 | * being asked to lock a buffer that has been reallocated. Because it is | ||
873 | * pinned, we know that the log has not been pushed to disk and hence it | ||
874 | * will still be locked. Rather than continuing to have trylock attempts | ||
875 | * fail until someone else pushes the log, push it ourselves before | ||
876 | * returning. This means that the xfsaild will not get stuck trying | ||
877 | * to push on stale inode buffers. | ||
883 | */ | 878 | */ |
884 | int | 879 | int |
885 | xfs_buf_cond_lock( | 880 | xfs_buf_cond_lock( |
@@ -890,6 +885,8 @@ xfs_buf_cond_lock( | |||
890 | locked = down_trylock(&bp->b_sema) == 0; | 885 | locked = down_trylock(&bp->b_sema) == 0; |
891 | if (locked) | 886 | if (locked) |
892 | XB_SET_OWNER(bp); | 887 | XB_SET_OWNER(bp); |
888 | else if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) | ||
889 | xfs_log_force(bp->b_target->bt_mount, 0); | ||
893 | 890 | ||
894 | trace_xfs_buf_cond_lock(bp, _RET_IP_); | 891 | trace_xfs_buf_cond_lock(bp, _RET_IP_); |
895 | return locked ? 0 : -EBUSY; | 892 | return locked ? 0 : -EBUSY; |
@@ -1781,7 +1778,6 @@ xfs_buf_delwri_split( | |||
1781 | INIT_LIST_HEAD(list); | 1778 | INIT_LIST_HEAD(list); |
1782 | spin_lock(dwlk); | 1779 | spin_lock(dwlk); |
1783 | list_for_each_entry_safe(bp, n, dwq, b_list) { | 1780 | list_for_each_entry_safe(bp, n, dwq, b_list) { |
1784 | trace_xfs_buf_delwri_split(bp, _RET_IP_); | ||
1785 | ASSERT(bp->b_flags & XBF_DELWRI); | 1781 | ASSERT(bp->b_flags & XBF_DELWRI); |
1786 | 1782 | ||
1787 | if (!XFS_BUF_ISPINNED(bp) && !xfs_buf_cond_lock(bp)) { | 1783 | if (!XFS_BUF_ISPINNED(bp) && !xfs_buf_cond_lock(bp)) { |
@@ -1795,6 +1791,7 @@ xfs_buf_delwri_split( | |||
1795 | _XBF_RUN_QUEUES); | 1791 | _XBF_RUN_QUEUES); |
1796 | bp->b_flags |= XBF_WRITE; | 1792 | bp->b_flags |= XBF_WRITE; |
1797 | list_move_tail(&bp->b_list, list); | 1793 | list_move_tail(&bp->b_list, list); |
1794 | trace_xfs_buf_delwri_split(bp, _RET_IP_); | ||
1798 | } else | 1795 | } else |
1799 | skipped++; | 1796 | skipped++; |
1800 | } | 1797 | } |
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 2ea238f6d38e..ad442d9e392e 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c | |||
@@ -416,7 +416,7 @@ xfs_attrlist_by_handle( | |||
416 | if (IS_ERR(dentry)) | 416 | if (IS_ERR(dentry)) |
417 | return PTR_ERR(dentry); | 417 | return PTR_ERR(dentry); |
418 | 418 | ||
419 | kbuf = kmalloc(al_hreq.buflen, GFP_KERNEL); | 419 | kbuf = kzalloc(al_hreq.buflen, GFP_KERNEL); |
420 | if (!kbuf) | 420 | if (!kbuf) |
421 | goto out_dput; | 421 | goto out_dput; |
422 | 422 | ||
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 96107efc0c61..94d5fd6a2973 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c | |||
@@ -762,7 +762,8 @@ xfs_setup_inode( | |||
762 | inode->i_state = I_NEW; | 762 | inode->i_state = I_NEW; |
763 | 763 | ||
764 | inode_sb_list_add(inode); | 764 | inode_sb_list_add(inode); |
765 | insert_inode_hash(inode); | 765 | /* make the inode look hashed for the writeback code */ |
766 | hlist_add_fake(&inode->i_hash); | ||
766 | 767 | ||
767 | inode->i_mode = ip->i_d.di_mode; | 768 | inode->i_mode = ip->i_d.di_mode; |
768 | inode->i_nlink = ip->i_d.di_nlink; | 769 | inode->i_nlink = ip->i_d.di_nlink; |
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 7465a7ffc4fd..c115dd5e95a8 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
@@ -353,9 +353,6 @@ xfs_parseargs( | |||
353 | mp->m_qflags &= ~XFS_OQUOTA_ENFD; | 353 | mp->m_qflags &= ~XFS_OQUOTA_ENFD; |
354 | } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) { | 354 | } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) { |
355 | mp->m_flags |= XFS_MOUNT_DELAYLOG; | 355 | mp->m_flags |= XFS_MOUNT_DELAYLOG; |
356 | cmn_err(CE_WARN, | ||
357 | "Enabling EXPERIMENTAL delayed logging feature " | ||
358 | "- use at your own risk.\n"); | ||
359 | } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) { | 356 | } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) { |
360 | mp->m_flags &= ~XFS_MOUNT_DELAYLOG; | 357 | mp->m_flags &= ~XFS_MOUNT_DELAYLOG; |
361 | } else if (!strcmp(this_char, "ihashsize")) { | 358 | } else if (!strcmp(this_char, "ihashsize")) { |
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 37d33254981d..afb0d7cfad1c 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c | |||
@@ -853,6 +853,7 @@ restart: | |||
853 | if (trylock) { | 853 | if (trylock) { |
854 | if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) { | 854 | if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) { |
855 | skipped++; | 855 | skipped++; |
856 | xfs_perag_put(pag); | ||
856 | continue; | 857 | continue; |
857 | } | 858 | } |
858 | first_index = pag->pag_ici_reclaim_cursor; | 859 | first_index = pag->pag_ici_reclaim_cursor; |
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 8abd12e32e13..4111cd3966c7 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c | |||
@@ -5471,8 +5471,13 @@ xfs_getbmap( | |||
5471 | if (error) | 5471 | if (error) |
5472 | goto out_unlock_iolock; | 5472 | goto out_unlock_iolock; |
5473 | } | 5473 | } |
5474 | 5474 | /* | |
5475 | ASSERT(ip->i_delayed_blks == 0); | 5475 | * even after flushing the inode, there can still be delalloc |
5476 | * blocks on the inode beyond EOF due to speculative | ||
5477 | * preallocation. These are not removed until the release | ||
5478 | * function is called or the inode is inactivated. Hence we | ||
5479 | * cannot assert here that ip->i_delayed_blks == 0. | ||
5480 | */ | ||
5476 | } | 5481 | } |
5477 | 5482 | ||
5478 | lock = xfs_ilock_map_shared(ip); | 5483 | lock = xfs_ilock_map_shared(ip); |
@@ -6070,3 +6075,79 @@ xfs_bmap_disk_count_leaves( | |||
6070 | *count += xfs_bmbt_disk_get_blockcount(frp); | 6075 | *count += xfs_bmbt_disk_get_blockcount(frp); |
6071 | } | 6076 | } |
6072 | } | 6077 | } |
6078 | |||
6079 | /* | ||
6080 | * dead simple method of punching delalyed allocation blocks from a range in | ||
6081 | * the inode. Walks a block at a time so will be slow, but is only executed in | ||
6082 | * rare error cases so the overhead is not critical. This will alays punch out | ||
6083 | * both the start and end blocks, even if the ranges only partially overlap | ||
6084 | * them, so it is up to the caller to ensure that partial blocks are not | ||
6085 | * passed in. | ||
6086 | */ | ||
6087 | int | ||
6088 | xfs_bmap_punch_delalloc_range( | ||
6089 | struct xfs_inode *ip, | ||
6090 | xfs_fileoff_t start_fsb, | ||
6091 | xfs_fileoff_t length) | ||
6092 | { | ||
6093 | xfs_fileoff_t remaining = length; | ||
6094 | int error = 0; | ||
6095 | |||
6096 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | ||
6097 | |||
6098 | do { | ||
6099 | int done; | ||
6100 | xfs_bmbt_irec_t imap; | ||
6101 | int nimaps = 1; | ||
6102 | xfs_fsblock_t firstblock; | ||
6103 | xfs_bmap_free_t flist; | ||
6104 | |||
6105 | /* | ||
6106 | * Map the range first and check that it is a delalloc extent | ||
6107 | * before trying to unmap the range. Otherwise we will be | ||
6108 | * trying to remove a real extent (which requires a | ||
6109 | * transaction) or a hole, which is probably a bad idea... | ||
6110 | */ | ||
6111 | error = xfs_bmapi(NULL, ip, start_fsb, 1, | ||
6112 | XFS_BMAPI_ENTIRE, NULL, 0, &imap, | ||
6113 | &nimaps, NULL); | ||
6114 | |||
6115 | if (error) { | ||
6116 | /* something screwed, just bail */ | ||
6117 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { | ||
6118 | xfs_fs_cmn_err(CE_ALERT, ip->i_mount, | ||
6119 | "Failed delalloc mapping lookup ino %lld fsb %lld.", | ||
6120 | ip->i_ino, start_fsb); | ||
6121 | } | ||
6122 | break; | ||
6123 | } | ||
6124 | if (!nimaps) { | ||
6125 | /* nothing there */ | ||
6126 | goto next_block; | ||
6127 | } | ||
6128 | if (imap.br_startblock != DELAYSTARTBLOCK) { | ||
6129 | /* been converted, ignore */ | ||
6130 | goto next_block; | ||
6131 | } | ||
6132 | WARN_ON(imap.br_blockcount == 0); | ||
6133 | |||
6134 | /* | ||
6135 | * Note: while we initialise the firstblock/flist pair, they | ||
6136 | * should never be used because blocks should never be | ||
6137 | * allocated or freed for a delalloc extent and hence we need | ||
6138 | * don't cancel or finish them after the xfs_bunmapi() call. | ||
6139 | */ | ||
6140 | xfs_bmap_init(&flist, &firstblock); | ||
6141 | error = xfs_bunmapi(NULL, ip, start_fsb, 1, 0, 1, &firstblock, | ||
6142 | &flist, &done); | ||
6143 | if (error) | ||
6144 | break; | ||
6145 | |||
6146 | ASSERT(!flist.xbf_count && !flist.xbf_first); | ||
6147 | next_block: | ||
6148 | start_fsb++; | ||
6149 | remaining--; | ||
6150 | } while(remaining > 0); | ||
6151 | |||
6152 | return error; | ||
6153 | } | ||
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h index 71ec9b6ecdfc..3651191daea1 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/xfs_bmap.h | |||
@@ -394,6 +394,11 @@ xfs_bmap_count_blocks( | |||
394 | int whichfork, | 394 | int whichfork, |
395 | int *count); | 395 | int *count); |
396 | 396 | ||
397 | int | ||
398 | xfs_bmap_punch_delalloc_range( | ||
399 | struct xfs_inode *ip, | ||
400 | xfs_fileoff_t start_fsb, | ||
401 | xfs_fileoff_t length); | ||
397 | #endif /* __KERNEL__ */ | 402 | #endif /* __KERNEL__ */ |
398 | 403 | ||
399 | #endif /* __XFS_BMAP_H__ */ | 404 | #endif /* __XFS_BMAP_H__ */ |
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index 3b9582c60a22..e60490bc00a6 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c | |||
@@ -377,6 +377,19 @@ xfs_swap_extents( | |||
377 | ip->i_d.di_format = tip->i_d.di_format; | 377 | ip->i_d.di_format = tip->i_d.di_format; |
378 | tip->i_d.di_format = tmp; | 378 | tip->i_d.di_format = tmp; |
379 | 379 | ||
380 | /* | ||
381 | * The extents in the source inode could still contain speculative | ||
382 | * preallocation beyond EOF (e.g. the file is open but not modified | ||
383 | * while defrag is in progress). In that case, we need to copy over the | ||
384 | * number of delalloc blocks the data fork in the source inode is | ||
385 | * tracking beyond EOF so that when the fork is truncated away when the | ||
386 | * temporary inode is unlinked we don't underrun the i_delayed_blks | ||
387 | * counter on that inode. | ||
388 | */ | ||
389 | ASSERT(tip->i_delayed_blks == 0); | ||
390 | tip->i_delayed_blks = ip->i_delayed_blks; | ||
391 | ip->i_delayed_blks = 0; | ||
392 | |||
380 | ilf_fields = XFS_ILOG_CORE; | 393 | ilf_fields = XFS_ILOG_CORE; |
381 | 394 | ||
382 | switch(ip->i_d.di_format) { | 395 | switch(ip->i_d.di_format) { |
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index ed9990267661..c78cc6a3d87c 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c | |||
@@ -58,6 +58,7 @@ xfs_error_trap(int e) | |||
58 | int xfs_etest[XFS_NUM_INJECT_ERROR]; | 58 | int xfs_etest[XFS_NUM_INJECT_ERROR]; |
59 | int64_t xfs_etest_fsid[XFS_NUM_INJECT_ERROR]; | 59 | int64_t xfs_etest_fsid[XFS_NUM_INJECT_ERROR]; |
60 | char * xfs_etest_fsname[XFS_NUM_INJECT_ERROR]; | 60 | char * xfs_etest_fsname[XFS_NUM_INJECT_ERROR]; |
61 | int xfs_error_test_active; | ||
61 | 62 | ||
62 | int | 63 | int |
63 | xfs_error_test(int error_tag, int *fsidp, char *expression, | 64 | xfs_error_test(int error_tag, int *fsidp, char *expression, |
@@ -108,6 +109,7 @@ xfs_errortag_add(int error_tag, xfs_mount_t *mp) | |||
108 | len = strlen(mp->m_fsname); | 109 | len = strlen(mp->m_fsname); |
109 | xfs_etest_fsname[i] = kmem_alloc(len + 1, KM_SLEEP); | 110 | xfs_etest_fsname[i] = kmem_alloc(len + 1, KM_SLEEP); |
110 | strcpy(xfs_etest_fsname[i], mp->m_fsname); | 111 | strcpy(xfs_etest_fsname[i], mp->m_fsname); |
112 | xfs_error_test_active++; | ||
111 | return 0; | 113 | return 0; |
112 | } | 114 | } |
113 | } | 115 | } |
@@ -137,6 +139,7 @@ xfs_errortag_clearall(xfs_mount_t *mp, int loud) | |||
137 | xfs_etest_fsid[i] = 0LL; | 139 | xfs_etest_fsid[i] = 0LL; |
138 | kmem_free(xfs_etest_fsname[i]); | 140 | kmem_free(xfs_etest_fsname[i]); |
139 | xfs_etest_fsname[i] = NULL; | 141 | xfs_etest_fsname[i] = NULL; |
142 | xfs_error_test_active--; | ||
140 | } | 143 | } |
141 | } | 144 | } |
142 | 145 | ||
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index c2c1a072bb82..f338847f80b8 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h | |||
@@ -127,13 +127,14 @@ extern void xfs_corruption_error(const char *tag, int level, | |||
127 | #define XFS_RANDOM_BMAPIFORMAT XFS_RANDOM_DEFAULT | 127 | #define XFS_RANDOM_BMAPIFORMAT XFS_RANDOM_DEFAULT |
128 | 128 | ||
129 | #ifdef DEBUG | 129 | #ifdef DEBUG |
130 | extern int xfs_error_test_active; | ||
130 | extern int xfs_error_test(int, int *, char *, int, char *, unsigned long); | 131 | extern int xfs_error_test(int, int *, char *, int, char *, unsigned long); |
131 | 132 | ||
132 | #define XFS_NUM_INJECT_ERROR 10 | 133 | #define XFS_NUM_INJECT_ERROR 10 |
133 | #define XFS_TEST_ERROR(expr, mp, tag, rf) \ | 134 | #define XFS_TEST_ERROR(expr, mp, tag, rf) \ |
134 | ((expr) || \ | 135 | ((expr) || (xfs_error_test_active && \ |
135 | xfs_error_test((tag), (mp)->m_fixedfsid, "expr", __LINE__, __FILE__, \ | 136 | xfs_error_test((tag), (mp)->m_fixedfsid, "expr", __LINE__, __FILE__, \ |
136 | (rf))) | 137 | (rf)))) |
137 | 138 | ||
138 | extern int xfs_errortag_add(int error_tag, xfs_mount_t *mp); | 139 | extern int xfs_errortag_add(int error_tag, xfs_mount_t *mp); |
139 | extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud); | 140 | extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud); |
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index 9b715dce5699..9124425b7f2f 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c | |||
@@ -744,9 +744,15 @@ xfs_filestream_new_ag( | |||
744 | * If the file's parent directory is known, take its iolock in exclusive | 744 | * If the file's parent directory is known, take its iolock in exclusive |
745 | * mode to prevent two sibling files from racing each other to migrate | 745 | * mode to prevent two sibling files from racing each other to migrate |
746 | * themselves and their parent to different AGs. | 746 | * themselves and their parent to different AGs. |
747 | * | ||
748 | * Note that we lock the parent directory iolock inside the child | ||
749 | * iolock here. That's fine as we never hold both parent and child | ||
750 | * iolock in any other place. This is different from the ilock, | ||
751 | * which requires locking of the child after the parent for namespace | ||
752 | * operations. | ||
747 | */ | 753 | */ |
748 | if (pip) | 754 | if (pip) |
749 | xfs_ilock(pip, XFS_IOLOCK_EXCL); | 755 | xfs_ilock(pip, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT); |
750 | 756 | ||
751 | /* | 757 | /* |
752 | * A new AG needs to be found for the file. If the file's parent | 758 | * A new AG needs to be found for the file. If the file's parent |
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index c7ac020705df..7c8d30c453c3 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c | |||
@@ -657,18 +657,37 @@ xfs_inode_item_unlock( | |||
657 | } | 657 | } |
658 | 658 | ||
659 | /* | 659 | /* |
660 | * This is called to find out where the oldest active copy of the | 660 | * This is called to find out where the oldest active copy of the inode log |
661 | * inode log item in the on disk log resides now that the last log | 661 | * item in the on disk log resides now that the last log write of it completed |
662 | * write of it completed at the given lsn. Since we always re-log | 662 | * at the given lsn. Since we always re-log all dirty data in an inode, the |
663 | * all dirty data in an inode, the latest copy in the on disk log | 663 | * latest copy in the on disk log is the only one that matters. Therefore, |
664 | * is the only one that matters. Therefore, simply return the | 664 | * simply return the given lsn. |
665 | * given lsn. | 665 | * |
666 | * If the inode has been marked stale because the cluster is being freed, we | ||
667 | * don't want to (re-)insert this inode into the AIL. There is a race condition | ||
668 | * where the cluster buffer may be unpinned before the inode is inserted into | ||
669 | * the AIL during transaction committed processing. If the buffer is unpinned | ||
670 | * before the inode item has been committed and inserted, then it is possible | ||
671 | * for the buffer to be written and IO completions before the inode is inserted | ||
672 | * into the AIL. In that case, we'd be inserting a clean, stale inode into the | ||
673 | * AIL which will never get removed. It will, however, get reclaimed which | ||
674 | * triggers an assert in xfs_inode_free() complaining about freein an inode | ||
675 | * still in the AIL. | ||
676 | * | ||
677 | * To avoid this, return a lower LSN than the one passed in so that the | ||
678 | * transaction committed code will not move the inode forward in the AIL but | ||
679 | * will still unpin it properly. | ||
666 | */ | 680 | */ |
667 | STATIC xfs_lsn_t | 681 | STATIC xfs_lsn_t |
668 | xfs_inode_item_committed( | 682 | xfs_inode_item_committed( |
669 | struct xfs_log_item *lip, | 683 | struct xfs_log_item *lip, |
670 | xfs_lsn_t lsn) | 684 | xfs_lsn_t lsn) |
671 | { | 685 | { |
686 | struct xfs_inode_log_item *iip = INODE_ITEM(lip); | ||
687 | struct xfs_inode *ip = iip->ili_inode; | ||
688 | |||
689 | if (xfs_iflags_test(ip, XFS_ISTALE)) | ||
690 | return lsn - 1; | ||
672 | return lsn; | 691 | return lsn; |
673 | } | 692 | } |
674 | 693 | ||
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index b1498ab5a399..19e9dfa1c254 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c | |||
@@ -275,6 +275,7 @@ xfs_free_perag( | |||
275 | pag = radix_tree_delete(&mp->m_perag_tree, agno); | 275 | pag = radix_tree_delete(&mp->m_perag_tree, agno); |
276 | spin_unlock(&mp->m_perag_lock); | 276 | spin_unlock(&mp->m_perag_lock); |
277 | ASSERT(pag); | 277 | ASSERT(pag); |
278 | ASSERT(atomic_read(&pag->pag_ref) == 0); | ||
278 | call_rcu(&pag->rcu_head, __xfs_free_perag); | 279 | call_rcu(&pag->rcu_head, __xfs_free_perag); |
279 | } | 280 | } |
280 | } | 281 | } |
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index e0e64b113bd6..9bb6eda4cd21 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h | |||
@@ -346,8 +346,17 @@ xfs_qm_vop_dqalloc(struct xfs_inode *ip, uid_t uid, gid_t gid, prid_t prid, | |||
346 | #define xfs_trans_mod_dquot_byino(tp, ip, fields, delta) | 346 | #define xfs_trans_mod_dquot_byino(tp, ip, fields, delta) |
347 | #define xfs_trans_apply_dquot_deltas(tp) | 347 | #define xfs_trans_apply_dquot_deltas(tp) |
348 | #define xfs_trans_unreserve_and_mod_dquots(tp) | 348 | #define xfs_trans_unreserve_and_mod_dquots(tp) |
349 | #define xfs_trans_reserve_quota_nblks(tp, ip, nblks, ninos, flags) (0) | 349 | static inline int xfs_trans_reserve_quota_nblks(struct xfs_trans *tp, |
350 | #define xfs_trans_reserve_quota_bydquots(tp, mp, u, g, nb, ni, fl) (0) | 350 | struct xfs_inode *ip, long nblks, long ninos, uint flags) |
351 | { | ||
352 | return 0; | ||
353 | } | ||
354 | static inline int xfs_trans_reserve_quota_bydquots(struct xfs_trans *tp, | ||
355 | struct xfs_mount *mp, struct xfs_dquot *udqp, | ||
356 | struct xfs_dquot *gdqp, long nblks, long nions, uint flags) | ||
357 | { | ||
358 | return 0; | ||
359 | } | ||
351 | #define xfs_qm_vop_create_dqattach(tp, ip, u, g) | 360 | #define xfs_qm_vop_create_dqattach(tp, ip, u, g) |
352 | #define xfs_qm_vop_rename_dqattach(it) (0) | 361 | #define xfs_qm_vop_rename_dqattach(it) (0) |
353 | #define xfs_qm_vop_chown(tp, ip, old, new) (NULL) | 362 | #define xfs_qm_vop_chown(tp, ip, old, new) (NULL) |
@@ -357,11 +366,14 @@ xfs_qm_vop_dqalloc(struct xfs_inode *ip, uid_t uid, gid_t gid, prid_t prid, | |||
357 | #define xfs_qm_dqdetach(ip) | 366 | #define xfs_qm_dqdetach(ip) |
358 | #define xfs_qm_dqrele(d) | 367 | #define xfs_qm_dqrele(d) |
359 | #define xfs_qm_statvfs(ip, s) | 368 | #define xfs_qm_statvfs(ip, s) |
360 | #define xfs_qm_sync(mp, fl) (0) | 369 | static inline int xfs_qm_sync(struct xfs_mount *mp, int flags) |
370 | { | ||
371 | return 0; | ||
372 | } | ||
361 | #define xfs_qm_newmount(mp, a, b) (0) | 373 | #define xfs_qm_newmount(mp, a, b) (0) |
362 | #define xfs_qm_mount_quotas(mp) | 374 | #define xfs_qm_mount_quotas(mp) |
363 | #define xfs_qm_unmount(mp) | 375 | #define xfs_qm_unmount(mp) |
364 | #define xfs_qm_unmount_quotas(mp) (0) | 376 | #define xfs_qm_unmount_quotas(mp) |
365 | #endif /* CONFIG_XFS_QUOTA */ | 377 | #endif /* CONFIG_XFS_QUOTA */ |
366 | 378 | ||
367 | #define xfs_trans_unreserve_quota_nblks(tp, ip, nblks, ninos, flags) \ | 379 | #define xfs_trans_unreserve_quota_nblks(tp, ip, nblks, ninos, flags) \ |
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c index d2af0a8381a6..77a59891734e 100644 --- a/fs/xfs/xfs_rename.c +++ b/fs/xfs/xfs_rename.c | |||
@@ -297,6 +297,7 @@ xfs_rename( | |||
297 | * it and some incremental backup programs won't work without it. | 297 | * it and some incremental backup programs won't work without it. |
298 | */ | 298 | */ |
299 | xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG); | 299 | xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG); |
300 | xfs_trans_log_inode(tp, src_ip, XFS_ILOG_CORE); | ||
300 | 301 | ||
301 | /* | 302 | /* |
302 | * Adjust the link count on src_dp. This is necessary when | 303 | * Adjust the link count on src_dp. This is necessary when |