diff options
author | Tony Lindgren <tony@atomide.com> | 2011-06-13 10:40:25 -0400 |
---|---|---|
committer | Tony Lindgren <tony@atomide.com> | 2011-06-13 10:40:25 -0400 |
commit | c8e0bf95fc01d6e2ca585fe08010800b6c56e823 (patch) | |
tree | f901bdcb5b20e93261cf9cf324ebbcf3fd24ce58 /fs | |
parent | 9d5ae7cd6cb9ead43336fec1094184d1dc740fbd (diff) | |
parent | 345f79b3de7f6d651e4dba794af7c7303bdfd649 (diff) |
Merge branch 'for_3.0/pm-fixes' of ssh://master.kernel.org/pub/scm/linux/kernel/git/khilman/linux-omap-pm into fixes
Diffstat (limited to 'fs')
125 files changed, 4779 insertions, 1421 deletions
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 8d7f3e69ae29..7f6c67703195 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c | |||
@@ -814,7 +814,6 @@ int v9fs_vfs_unlink(struct inode *i, struct dentry *d) | |||
814 | 814 | ||
815 | int v9fs_vfs_rmdir(struct inode *i, struct dentry *d) | 815 | int v9fs_vfs_rmdir(struct inode *i, struct dentry *d) |
816 | { | 816 | { |
817 | dentry_unhash(d); | ||
818 | return v9fs_remove(i, d, 1); | 817 | return v9fs_remove(i, d, 1); |
819 | } | 818 | } |
820 | 819 | ||
@@ -840,9 +839,6 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
840 | struct p9_fid *newdirfid; | 839 | struct p9_fid *newdirfid; |
841 | struct p9_wstat wstat; | 840 | struct p9_wstat wstat; |
842 | 841 | ||
843 | if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) | ||
844 | dentry_unhash(new_dentry); | ||
845 | |||
846 | P9_DPRINTK(P9_DEBUG_VFS, "\n"); | 842 | P9_DPRINTK(P9_DEBUG_VFS, "\n"); |
847 | retval = 0; | 843 | retval = 0; |
848 | old_inode = old_dentry->d_inode; | 844 | old_inode = old_dentry->d_inode; |
diff --git a/fs/affs/namei.c b/fs/affs/namei.c index 03330e2e390c..e3e9efc1fdd8 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c | |||
@@ -320,8 +320,6 @@ affs_rmdir(struct inode *dir, struct dentry *dentry) | |||
320 | dentry->d_inode->i_ino, | 320 | dentry->d_inode->i_ino, |
321 | (int)dentry->d_name.len, dentry->d_name.name); | 321 | (int)dentry->d_name.len, dentry->d_name.name); |
322 | 322 | ||
323 | dentry_unhash(dentry); | ||
324 | |||
325 | return affs_remove_header(dentry); | 323 | return affs_remove_header(dentry); |
326 | } | 324 | } |
327 | 325 | ||
@@ -419,9 +417,6 @@ affs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
419 | struct buffer_head *bh = NULL; | 417 | struct buffer_head *bh = NULL; |
420 | int retval; | 418 | int retval; |
421 | 419 | ||
422 | if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) | ||
423 | dentry_unhash(new_dentry); | ||
424 | |||
425 | pr_debug("AFFS: rename(old=%u,\"%*s\" to new=%u,\"%*s\")\n", | 420 | pr_debug("AFFS: rename(old=%u,\"%*s\" to new=%u,\"%*s\")\n", |
426 | (u32)old_dir->i_ino, (int)old_dentry->d_name.len, old_dentry->d_name.name, | 421 | (u32)old_dir->i_ino, (int)old_dentry->d_name.len, old_dentry->d_name.name, |
427 | (u32)new_dir->i_ino, (int)new_dentry->d_name.len, new_dentry->d_name.name); | 422 | (u32)new_dir->i_ino, (int)new_dentry->d_name.len, new_dentry->d_name.name); |
diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 2c4e05160042..20c106f24927 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c | |||
@@ -845,8 +845,6 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) | |||
845 | _enter("{%x:%u},{%s}", | 845 | _enter("{%x:%u},{%s}", |
846 | dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name); | 846 | dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name); |
847 | 847 | ||
848 | dentry_unhash(dentry); | ||
849 | |||
850 | ret = -ENAMETOOLONG; | 848 | ret = -ENAMETOOLONG; |
851 | if (dentry->d_name.len >= AFSNAMEMAX) | 849 | if (dentry->d_name.len >= AFSNAMEMAX) |
852 | goto error; | 850 | goto error; |
@@ -1148,9 +1146,6 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1148 | struct key *key; | 1146 | struct key *key; |
1149 | int ret; | 1147 | int ret; |
1150 | 1148 | ||
1151 | if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) | ||
1152 | dentry_unhash(new_dentry); | ||
1153 | |||
1154 | vnode = AFS_FS_I(old_dentry->d_inode); | 1149 | vnode = AFS_FS_I(old_dentry->d_inode); |
1155 | orig_dvnode = AFS_FS_I(old_dir); | 1150 | orig_dvnode = AFS_FS_I(old_dir); |
1156 | new_dvnode = AFS_FS_I(new_dir); | 1151 | new_dvnode = AFS_FS_I(new_dir); |
@@ -175,6 +175,13 @@ int notify_change(struct dentry * dentry, struct iattr * attr) | |||
175 | return -EPERM; | 175 | return -EPERM; |
176 | } | 176 | } |
177 | 177 | ||
178 | if ((ia_valid & ATTR_MODE)) { | ||
179 | mode_t amode = attr->ia_mode; | ||
180 | /* Flag setting protected by i_mutex */ | ||
181 | if (is_sxid(amode)) | ||
182 | inode->i_flags &= ~S_NOSEC; | ||
183 | } | ||
184 | |||
178 | now = current_fs_time(inode->i_sb); | 185 | now = current_fs_time(inode->i_sb); |
179 | 186 | ||
180 | attr->ia_ctime = now; | 187 | attr->ia_ctime = now; |
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 87d95a8cddbc..f55ae23b137e 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c | |||
@@ -583,8 +583,6 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry) | |||
583 | if (!autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN)) | 583 | if (!autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN)) |
584 | return -EACCES; | 584 | return -EACCES; |
585 | 585 | ||
586 | dentry_unhash(dentry); | ||
587 | |||
588 | if (atomic_dec_and_test(&ino->count)) { | 586 | if (atomic_dec_and_test(&ino->count)) { |
589 | p_ino = autofs4_dentry_ino(dentry->d_parent); | 587 | p_ino = autofs4_dentry_ino(dentry->d_parent); |
590 | if (p_ino && dentry->d_parent != dentry) | 588 | if (p_ino && dentry->d_parent != dentry) |
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index c7d1d06b0483..b14cebfd9047 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c | |||
@@ -224,9 +224,6 @@ static int bfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
224 | struct bfs_sb_info *info; | 224 | struct bfs_sb_info *info; |
225 | int error = -ENOENT; | 225 | int error = -ENOENT; |
226 | 226 | ||
227 | if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) | ||
228 | dentry_unhash(new_dentry); | ||
229 | |||
230 | old_bh = new_bh = NULL; | 227 | old_bh = new_bh = NULL; |
231 | old_inode = old_dentry->d_inode; | 228 | old_inode = old_dentry->d_inode; |
232 | if (S_ISDIR(old_inode->i_mode)) | 229 | if (S_ISDIR(old_inode->i_mode)) |
@@ -638,10 +638,11 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page | |||
638 | * @offset: vec entry offset | 638 | * @offset: vec entry offset |
639 | * | 639 | * |
640 | * Attempt to add a page to the bio_vec maplist. This can fail for a | 640 | * Attempt to add a page to the bio_vec maplist. This can fail for a |
641 | * number of reasons, such as the bio being full or target block | 641 | * number of reasons, such as the bio being full or target block device |
642 | * device limitations. The target block device must allow bio's | 642 | * limitations. The target block device must allow bio's up to PAGE_SIZE, |
643 | * smaller than PAGE_SIZE, so it is always possible to add a single | 643 | * so it is always possible to add a single page to an empty bio. |
644 | * page to an empty bio. This should only be used by REQ_PC bios. | 644 | * |
645 | * This should only be used by REQ_PC bios. | ||
645 | */ | 646 | */ |
646 | int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page *page, | 647 | int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page *page, |
647 | unsigned int len, unsigned int offset) | 648 | unsigned int len, unsigned int offset) |
@@ -659,10 +660,9 @@ EXPORT_SYMBOL(bio_add_pc_page); | |||
659 | * @offset: vec entry offset | 660 | * @offset: vec entry offset |
660 | * | 661 | * |
661 | * Attempt to add a page to the bio_vec maplist. This can fail for a | 662 | * Attempt to add a page to the bio_vec maplist. This can fail for a |
662 | * number of reasons, such as the bio being full or target block | 663 | * number of reasons, such as the bio being full or target block device |
663 | * device limitations. The target block device must allow bio's | 664 | * limitations. The target block device must allow bio's up to PAGE_SIZE, |
664 | * smaller than PAGE_SIZE, so it is always possible to add a single | 665 | * so it is always possible to add a single page to an empty bio. |
665 | * page to an empty bio. | ||
666 | */ | 666 | */ |
667 | int bio_add_page(struct bio *bio, struct page *page, unsigned int len, | 667 | int bio_add_page(struct bio *bio, struct page *page, unsigned int len, |
668 | unsigned int offset) | 668 | unsigned int offset) |
diff --git a/fs/block_dev.c b/fs/block_dev.c index 1f2b19978333..1a2421f908f0 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -1272,8 +1272,8 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) | |||
1272 | * individual writeable reference is too fragile given the | 1272 | * individual writeable reference is too fragile given the |
1273 | * way @mode is used in blkdev_get/put(). | 1273 | * way @mode is used in blkdev_get/put(). |
1274 | */ | 1274 | */ |
1275 | if ((disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE) && | 1275 | if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder && |
1276 | !res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) { | 1276 | (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) { |
1277 | bdev->bd_write_holder = true; | 1277 | bdev->bd_write_holder = true; |
1278 | disk_block_events(disk); | 1278 | disk_block_events(disk); |
1279 | } | 1279 | } |
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 93b1aa932014..52d7eca8c7bf 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -121,9 +121,6 @@ struct btrfs_inode { | |||
121 | */ | 121 | */ |
122 | u64 index_cnt; | 122 | u64 index_cnt; |
123 | 123 | ||
124 | /* the start of block group preferred for allocations. */ | ||
125 | u64 block_group; | ||
126 | |||
127 | /* the fsync log has some corner cases that mean we have to check | 124 | /* the fsync log has some corner cases that mean we have to check |
128 | * directories to see if any unlinks have been done before | 125 | * directories to see if any unlinks have been done before |
129 | * the directory was logged. See tree-log.c for all the | 126 | * the directory was logged. See tree-log.c for all the |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index b0e18d986e0a..d84089349c82 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -43,8 +43,6 @@ struct btrfs_path *btrfs_alloc_path(void) | |||
43 | { | 43 | { |
44 | struct btrfs_path *path; | 44 | struct btrfs_path *path; |
45 | path = kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS); | 45 | path = kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS); |
46 | if (path) | ||
47 | path->reada = 1; | ||
48 | return path; | 46 | return path; |
49 | } | 47 | } |
50 | 48 | ||
@@ -1224,6 +1222,7 @@ static void reada_for_search(struct btrfs_root *root, | |||
1224 | u64 search; | 1222 | u64 search; |
1225 | u64 target; | 1223 | u64 target; |
1226 | u64 nread = 0; | 1224 | u64 nread = 0; |
1225 | u64 gen; | ||
1227 | int direction = path->reada; | 1226 | int direction = path->reada; |
1228 | struct extent_buffer *eb; | 1227 | struct extent_buffer *eb; |
1229 | u32 nr; | 1228 | u32 nr; |
@@ -1251,6 +1250,15 @@ static void reada_for_search(struct btrfs_root *root, | |||
1251 | nritems = btrfs_header_nritems(node); | 1250 | nritems = btrfs_header_nritems(node); |
1252 | nr = slot; | 1251 | nr = slot; |
1253 | while (1) { | 1252 | while (1) { |
1253 | if (!node->map_token) { | ||
1254 | unsigned long offset = btrfs_node_key_ptr_offset(nr); | ||
1255 | map_private_extent_buffer(node, offset, | ||
1256 | sizeof(struct btrfs_key_ptr), | ||
1257 | &node->map_token, | ||
1258 | &node->kaddr, | ||
1259 | &node->map_start, | ||
1260 | &node->map_len, KM_USER1); | ||
1261 | } | ||
1254 | if (direction < 0) { | 1262 | if (direction < 0) { |
1255 | if (nr == 0) | 1263 | if (nr == 0) |
1256 | break; | 1264 | break; |
@@ -1268,14 +1276,23 @@ static void reada_for_search(struct btrfs_root *root, | |||
1268 | search = btrfs_node_blockptr(node, nr); | 1276 | search = btrfs_node_blockptr(node, nr); |
1269 | if ((search <= target && target - search <= 65536) || | 1277 | if ((search <= target && target - search <= 65536) || |
1270 | (search > target && search - target <= 65536)) { | 1278 | (search > target && search - target <= 65536)) { |
1271 | readahead_tree_block(root, search, blocksize, | 1279 | gen = btrfs_node_ptr_generation(node, nr); |
1272 | btrfs_node_ptr_generation(node, nr)); | 1280 | if (node->map_token) { |
1281 | unmap_extent_buffer(node, node->map_token, | ||
1282 | KM_USER1); | ||
1283 | node->map_token = NULL; | ||
1284 | } | ||
1285 | readahead_tree_block(root, search, blocksize, gen); | ||
1273 | nread += blocksize; | 1286 | nread += blocksize; |
1274 | } | 1287 | } |
1275 | nscan++; | 1288 | nscan++; |
1276 | if ((nread > 65536 || nscan > 32)) | 1289 | if ((nread > 65536 || nscan > 32)) |
1277 | break; | 1290 | break; |
1278 | } | 1291 | } |
1292 | if (node->map_token) { | ||
1293 | unmap_extent_buffer(node, node->map_token, KM_USER1); | ||
1294 | node->map_token = NULL; | ||
1295 | } | ||
1279 | } | 1296 | } |
1280 | 1297 | ||
1281 | /* | 1298 | /* |
@@ -1648,9 +1665,6 @@ again: | |||
1648 | } | 1665 | } |
1649 | cow_done: | 1666 | cow_done: |
1650 | BUG_ON(!cow && ins_len); | 1667 | BUG_ON(!cow && ins_len); |
1651 | if (level != btrfs_header_level(b)) | ||
1652 | WARN_ON(1); | ||
1653 | level = btrfs_header_level(b); | ||
1654 | 1668 | ||
1655 | p->nodes[level] = b; | 1669 | p->nodes[level] = b; |
1656 | if (!p->skip_locking) | 1670 | if (!p->skip_locking) |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 332323e19dd1..378b5b4443f3 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -930,7 +930,6 @@ struct btrfs_fs_info { | |||
930 | * is required instead of the faster short fsync log commits | 930 | * is required instead of the faster short fsync log commits |
931 | */ | 931 | */ |
932 | u64 last_trans_log_full_commit; | 932 | u64 last_trans_log_full_commit; |
933 | u64 open_ioctl_trans; | ||
934 | unsigned long mount_opt:20; | 933 | unsigned long mount_opt:20; |
935 | unsigned long compress_type:4; | 934 | unsigned long compress_type:4; |
936 | u64 max_inline; | 935 | u64 max_inline; |
@@ -947,7 +946,6 @@ struct btrfs_fs_info { | |||
947 | struct super_block *sb; | 946 | struct super_block *sb; |
948 | struct inode *btree_inode; | 947 | struct inode *btree_inode; |
949 | struct backing_dev_info bdi; | 948 | struct backing_dev_info bdi; |
950 | struct mutex trans_mutex; | ||
951 | struct mutex tree_log_mutex; | 949 | struct mutex tree_log_mutex; |
952 | struct mutex transaction_kthread_mutex; | 950 | struct mutex transaction_kthread_mutex; |
953 | struct mutex cleaner_mutex; | 951 | struct mutex cleaner_mutex; |
@@ -968,6 +966,7 @@ struct btrfs_fs_info { | |||
968 | struct rw_semaphore subvol_sem; | 966 | struct rw_semaphore subvol_sem; |
969 | struct srcu_struct subvol_srcu; | 967 | struct srcu_struct subvol_srcu; |
970 | 968 | ||
969 | spinlock_t trans_lock; | ||
971 | struct list_head trans_list; | 970 | struct list_head trans_list; |
972 | struct list_head hashers; | 971 | struct list_head hashers; |
973 | struct list_head dead_roots; | 972 | struct list_head dead_roots; |
@@ -980,6 +979,7 @@ struct btrfs_fs_info { | |||
980 | atomic_t async_submit_draining; | 979 | atomic_t async_submit_draining; |
981 | atomic_t nr_async_bios; | 980 | atomic_t nr_async_bios; |
982 | atomic_t async_delalloc_pages; | 981 | atomic_t async_delalloc_pages; |
982 | atomic_t open_ioctl_trans; | ||
983 | 983 | ||
984 | /* | 984 | /* |
985 | * this is used by the balancing code to wait for all the pending | 985 | * this is used by the balancing code to wait for all the pending |
@@ -1044,6 +1044,7 @@ struct btrfs_fs_info { | |||
1044 | int closing; | 1044 | int closing; |
1045 | int log_root_recovering; | 1045 | int log_root_recovering; |
1046 | int enospc_unlink; | 1046 | int enospc_unlink; |
1047 | int trans_no_join; | ||
1047 | 1048 | ||
1048 | u64 total_pinned; | 1049 | u64 total_pinned; |
1049 | 1050 | ||
@@ -1065,7 +1066,6 @@ struct btrfs_fs_info { | |||
1065 | struct reloc_control *reloc_ctl; | 1066 | struct reloc_control *reloc_ctl; |
1066 | 1067 | ||
1067 | spinlock_t delalloc_lock; | 1068 | spinlock_t delalloc_lock; |
1068 | spinlock_t new_trans_lock; | ||
1069 | u64 delalloc_bytes; | 1069 | u64 delalloc_bytes; |
1070 | 1070 | ||
1071 | /* data_alloc_cluster is only used in ssd mode */ | 1071 | /* data_alloc_cluster is only used in ssd mode */ |
@@ -1340,6 +1340,7 @@ struct btrfs_ioctl_defrag_range_args { | |||
1340 | #define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14) | 1340 | #define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14) |
1341 | #define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15) | 1341 | #define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15) |
1342 | #define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16) | 1342 | #define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16) |
1343 | #define BTRFS_MOUNT_INODE_MAP_CACHE (1 << 17) | ||
1343 | 1344 | ||
1344 | #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) | 1345 | #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) |
1345 | #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) | 1346 | #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) |
@@ -2238,6 +2239,9 @@ int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, | |||
2238 | void btrfs_block_rsv_release(struct btrfs_root *root, | 2239 | void btrfs_block_rsv_release(struct btrfs_root *root, |
2239 | struct btrfs_block_rsv *block_rsv, | 2240 | struct btrfs_block_rsv *block_rsv, |
2240 | u64 num_bytes); | 2241 | u64 num_bytes); |
2242 | int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans, | ||
2243 | struct btrfs_root *root, | ||
2244 | struct btrfs_block_rsv *rsv); | ||
2241 | int btrfs_set_block_group_ro(struct btrfs_root *root, | 2245 | int btrfs_set_block_group_ro(struct btrfs_root *root, |
2242 | struct btrfs_block_group_cache *cache); | 2246 | struct btrfs_block_group_cache *cache); |
2243 | int btrfs_set_block_group_rw(struct btrfs_root *root, | 2247 | int btrfs_set_block_group_rw(struct btrfs_root *root, |
@@ -2350,6 +2354,15 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | |||
2350 | struct btrfs_root *root, | 2354 | struct btrfs_root *root, |
2351 | struct extent_buffer *node, | 2355 | struct extent_buffer *node, |
2352 | struct extent_buffer *parent); | 2356 | struct extent_buffer *parent); |
2357 | static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info) | ||
2358 | { | ||
2359 | /* | ||
2360 | * Get synced with close_ctree() | ||
2361 | */ | ||
2362 | smp_mb(); | ||
2363 | return fs_info->closing; | ||
2364 | } | ||
2365 | |||
2353 | /* root-item.c */ | 2366 | /* root-item.c */ |
2354 | int btrfs_find_root_ref(struct btrfs_root *tree_root, | 2367 | int btrfs_find_root_ref(struct btrfs_root *tree_root, |
2355 | struct btrfs_path *path, | 2368 | struct btrfs_path *path, |
@@ -2512,8 +2525,7 @@ int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, | |||
2512 | int btrfs_writepages(struct address_space *mapping, | 2525 | int btrfs_writepages(struct address_space *mapping, |
2513 | struct writeback_control *wbc); | 2526 | struct writeback_control *wbc); |
2514 | int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, | 2527 | int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, |
2515 | struct btrfs_root *new_root, | 2528 | struct btrfs_root *new_root, u64 new_dirid); |
2516 | u64 new_dirid, u64 alloc_hint); | ||
2517 | int btrfs_merge_bio_hook(struct page *page, unsigned long offset, | 2529 | int btrfs_merge_bio_hook(struct page *page, unsigned long offset, |
2518 | size_t size, struct bio *bio, unsigned long bio_flags); | 2530 | size_t size, struct bio *bio, unsigned long bio_flags); |
2519 | 2531 | ||
@@ -2524,7 +2536,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); | |||
2524 | int btrfs_readpage(struct file *file, struct page *page); | 2536 | int btrfs_readpage(struct file *file, struct page *page); |
2525 | void btrfs_evict_inode(struct inode *inode); | 2537 | void btrfs_evict_inode(struct inode *inode); |
2526 | int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc); | 2538 | int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc); |
2527 | void btrfs_dirty_inode(struct inode *inode); | 2539 | void btrfs_dirty_inode(struct inode *inode, int flags); |
2528 | struct inode *btrfs_alloc_inode(struct super_block *sb); | 2540 | struct inode *btrfs_alloc_inode(struct super_block *sb); |
2529 | void btrfs_destroy_inode(struct inode *inode); | 2541 | void btrfs_destroy_inode(struct inode *inode); |
2530 | int btrfs_drop_inode(struct inode *inode); | 2542 | int btrfs_drop_inode(struct inode *inode); |
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 01e29503a54b..6462c29d2d37 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c | |||
@@ -678,6 +678,7 @@ static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans, | |||
678 | INIT_LIST_HEAD(&head); | 678 | INIT_LIST_HEAD(&head); |
679 | 679 | ||
680 | next = item; | 680 | next = item; |
681 | nitems = 0; | ||
681 | 682 | ||
682 | /* | 683 | /* |
683 | * count the number of the continuous items that we can insert in batch | 684 | * count the number of the continuous items that we can insert in batch |
@@ -1129,7 +1130,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work) | |||
1129 | delayed_node = async_node->delayed_node; | 1130 | delayed_node = async_node->delayed_node; |
1130 | root = delayed_node->root; | 1131 | root = delayed_node->root; |
1131 | 1132 | ||
1132 | trans = btrfs_join_transaction(root, 0); | 1133 | trans = btrfs_join_transaction(root); |
1133 | if (IS_ERR(trans)) | 1134 | if (IS_ERR(trans)) |
1134 | goto free_path; | 1135 | goto free_path; |
1135 | 1136 | ||
@@ -1572,8 +1573,7 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans, | |||
1572 | btrfs_set_stack_inode_transid(inode_item, trans->transid); | 1573 | btrfs_set_stack_inode_transid(inode_item, trans->transid); |
1573 | btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev); | 1574 | btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev); |
1574 | btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags); | 1575 | btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags); |
1575 | btrfs_set_stack_inode_block_group(inode_item, | 1576 | btrfs_set_stack_inode_block_group(inode_item, 0); |
1576 | BTRFS_I(inode)->block_group); | ||
1577 | 1577 | ||
1578 | btrfs_set_stack_timespec_sec(btrfs_inode_atime(inode_item), | 1578 | btrfs_set_stack_timespec_sec(btrfs_inode_atime(inode_item), |
1579 | inode->i_atime.tv_sec); | 1579 | inode->i_atime.tv_sec); |
@@ -1595,7 +1595,7 @@ int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans, | |||
1595 | struct btrfs_root *root, struct inode *inode) | 1595 | struct btrfs_root *root, struct inode *inode) |
1596 | { | 1596 | { |
1597 | struct btrfs_delayed_node *delayed_node; | 1597 | struct btrfs_delayed_node *delayed_node; |
1598 | int ret; | 1598 | int ret = 0; |
1599 | 1599 | ||
1600 | delayed_node = btrfs_get_or_create_delayed_node(inode); | 1600 | delayed_node = btrfs_get_or_create_delayed_node(inode); |
1601 | if (IS_ERR(delayed_node)) | 1601 | if (IS_ERR(delayed_node)) |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 98b6a71decba..a203d363184d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -1505,24 +1505,24 @@ static int transaction_kthread(void *arg) | |||
1505 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); | 1505 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); |
1506 | mutex_lock(&root->fs_info->transaction_kthread_mutex); | 1506 | mutex_lock(&root->fs_info->transaction_kthread_mutex); |
1507 | 1507 | ||
1508 | spin_lock(&root->fs_info->new_trans_lock); | 1508 | spin_lock(&root->fs_info->trans_lock); |
1509 | cur = root->fs_info->running_transaction; | 1509 | cur = root->fs_info->running_transaction; |
1510 | if (!cur) { | 1510 | if (!cur) { |
1511 | spin_unlock(&root->fs_info->new_trans_lock); | 1511 | spin_unlock(&root->fs_info->trans_lock); |
1512 | goto sleep; | 1512 | goto sleep; |
1513 | } | 1513 | } |
1514 | 1514 | ||
1515 | now = get_seconds(); | 1515 | now = get_seconds(); |
1516 | if (!cur->blocked && | 1516 | if (!cur->blocked && |
1517 | (now < cur->start_time || now - cur->start_time < 30)) { | 1517 | (now < cur->start_time || now - cur->start_time < 30)) { |
1518 | spin_unlock(&root->fs_info->new_trans_lock); | 1518 | spin_unlock(&root->fs_info->trans_lock); |
1519 | delay = HZ * 5; | 1519 | delay = HZ * 5; |
1520 | goto sleep; | 1520 | goto sleep; |
1521 | } | 1521 | } |
1522 | transid = cur->transid; | 1522 | transid = cur->transid; |
1523 | spin_unlock(&root->fs_info->new_trans_lock); | 1523 | spin_unlock(&root->fs_info->trans_lock); |
1524 | 1524 | ||
1525 | trans = btrfs_join_transaction(root, 1); | 1525 | trans = btrfs_join_transaction(root); |
1526 | BUG_ON(IS_ERR(trans)); | 1526 | BUG_ON(IS_ERR(trans)); |
1527 | if (transid == trans->transid) { | 1527 | if (transid == trans->transid) { |
1528 | ret = btrfs_commit_transaction(trans, root); | 1528 | ret = btrfs_commit_transaction(trans, root); |
@@ -1613,7 +1613,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1613 | INIT_LIST_HEAD(&fs_info->ordered_operations); | 1613 | INIT_LIST_HEAD(&fs_info->ordered_operations); |
1614 | INIT_LIST_HEAD(&fs_info->caching_block_groups); | 1614 | INIT_LIST_HEAD(&fs_info->caching_block_groups); |
1615 | spin_lock_init(&fs_info->delalloc_lock); | 1615 | spin_lock_init(&fs_info->delalloc_lock); |
1616 | spin_lock_init(&fs_info->new_trans_lock); | 1616 | spin_lock_init(&fs_info->trans_lock); |
1617 | spin_lock_init(&fs_info->ref_cache_lock); | 1617 | spin_lock_init(&fs_info->ref_cache_lock); |
1618 | spin_lock_init(&fs_info->fs_roots_radix_lock); | 1618 | spin_lock_init(&fs_info->fs_roots_radix_lock); |
1619 | spin_lock_init(&fs_info->delayed_iput_lock); | 1619 | spin_lock_init(&fs_info->delayed_iput_lock); |
@@ -1645,6 +1645,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1645 | fs_info->max_inline = 8192 * 1024; | 1645 | fs_info->max_inline = 8192 * 1024; |
1646 | fs_info->metadata_ratio = 0; | 1646 | fs_info->metadata_ratio = 0; |
1647 | fs_info->defrag_inodes = RB_ROOT; | 1647 | fs_info->defrag_inodes = RB_ROOT; |
1648 | fs_info->trans_no_join = 0; | ||
1648 | 1649 | ||
1649 | fs_info->thread_pool_size = min_t(unsigned long, | 1650 | fs_info->thread_pool_size = min_t(unsigned long, |
1650 | num_online_cpus() + 2, 8); | 1651 | num_online_cpus() + 2, 8); |
@@ -1709,7 +1710,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1709 | fs_info->do_barriers = 1; | 1710 | fs_info->do_barriers = 1; |
1710 | 1711 | ||
1711 | 1712 | ||
1712 | mutex_init(&fs_info->trans_mutex); | ||
1713 | mutex_init(&fs_info->ordered_operations_mutex); | 1713 | mutex_init(&fs_info->ordered_operations_mutex); |
1714 | mutex_init(&fs_info->tree_log_mutex); | 1714 | mutex_init(&fs_info->tree_log_mutex); |
1715 | mutex_init(&fs_info->chunk_mutex); | 1715 | mutex_init(&fs_info->chunk_mutex); |
@@ -2479,13 +2479,13 @@ int btrfs_commit_super(struct btrfs_root *root) | |||
2479 | down_write(&root->fs_info->cleanup_work_sem); | 2479 | down_write(&root->fs_info->cleanup_work_sem); |
2480 | up_write(&root->fs_info->cleanup_work_sem); | 2480 | up_write(&root->fs_info->cleanup_work_sem); |
2481 | 2481 | ||
2482 | trans = btrfs_join_transaction(root, 1); | 2482 | trans = btrfs_join_transaction(root); |
2483 | if (IS_ERR(trans)) | 2483 | if (IS_ERR(trans)) |
2484 | return PTR_ERR(trans); | 2484 | return PTR_ERR(trans); |
2485 | ret = btrfs_commit_transaction(trans, root); | 2485 | ret = btrfs_commit_transaction(trans, root); |
2486 | BUG_ON(ret); | 2486 | BUG_ON(ret); |
2487 | /* run commit again to drop the original snapshot */ | 2487 | /* run commit again to drop the original snapshot */ |
2488 | trans = btrfs_join_transaction(root, 1); | 2488 | trans = btrfs_join_transaction(root); |
2489 | if (IS_ERR(trans)) | 2489 | if (IS_ERR(trans)) |
2490 | return PTR_ERR(trans); | 2490 | return PTR_ERR(trans); |
2491 | btrfs_commit_transaction(trans, root); | 2491 | btrfs_commit_transaction(trans, root); |
@@ -3024,10 +3024,13 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root) | |||
3024 | 3024 | ||
3025 | WARN_ON(1); | 3025 | WARN_ON(1); |
3026 | 3026 | ||
3027 | mutex_lock(&root->fs_info->trans_mutex); | ||
3028 | mutex_lock(&root->fs_info->transaction_kthread_mutex); | 3027 | mutex_lock(&root->fs_info->transaction_kthread_mutex); |
3029 | 3028 | ||
3029 | spin_lock(&root->fs_info->trans_lock); | ||
3030 | list_splice_init(&root->fs_info->trans_list, &list); | 3030 | list_splice_init(&root->fs_info->trans_list, &list); |
3031 | root->fs_info->trans_no_join = 1; | ||
3032 | spin_unlock(&root->fs_info->trans_lock); | ||
3033 | |||
3031 | while (!list_empty(&list)) { | 3034 | while (!list_empty(&list)) { |
3032 | t = list_entry(list.next, struct btrfs_transaction, list); | 3035 | t = list_entry(list.next, struct btrfs_transaction, list); |
3033 | if (!t) | 3036 | if (!t) |
@@ -3052,23 +3055,18 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root) | |||
3052 | t->blocked = 0; | 3055 | t->blocked = 0; |
3053 | if (waitqueue_active(&root->fs_info->transaction_wait)) | 3056 | if (waitqueue_active(&root->fs_info->transaction_wait)) |
3054 | wake_up(&root->fs_info->transaction_wait); | 3057 | wake_up(&root->fs_info->transaction_wait); |
3055 | mutex_unlock(&root->fs_info->trans_mutex); | ||
3056 | 3058 | ||
3057 | mutex_lock(&root->fs_info->trans_mutex); | ||
3058 | t->commit_done = 1; | 3059 | t->commit_done = 1; |
3059 | if (waitqueue_active(&t->commit_wait)) | 3060 | if (waitqueue_active(&t->commit_wait)) |
3060 | wake_up(&t->commit_wait); | 3061 | wake_up(&t->commit_wait); |
3061 | mutex_unlock(&root->fs_info->trans_mutex); | ||
3062 | |||
3063 | mutex_lock(&root->fs_info->trans_mutex); | ||
3064 | 3062 | ||
3065 | btrfs_destroy_pending_snapshots(t); | 3063 | btrfs_destroy_pending_snapshots(t); |
3066 | 3064 | ||
3067 | btrfs_destroy_delalloc_inodes(root); | 3065 | btrfs_destroy_delalloc_inodes(root); |
3068 | 3066 | ||
3069 | spin_lock(&root->fs_info->new_trans_lock); | 3067 | spin_lock(&root->fs_info->trans_lock); |
3070 | root->fs_info->running_transaction = NULL; | 3068 | root->fs_info->running_transaction = NULL; |
3071 | spin_unlock(&root->fs_info->new_trans_lock); | 3069 | spin_unlock(&root->fs_info->trans_lock); |
3072 | 3070 | ||
3073 | btrfs_destroy_marked_extents(root, &t->dirty_pages, | 3071 | btrfs_destroy_marked_extents(root, &t->dirty_pages, |
3074 | EXTENT_DIRTY); | 3072 | EXTENT_DIRTY); |
@@ -3082,8 +3080,10 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root) | |||
3082 | kmem_cache_free(btrfs_transaction_cachep, t); | 3080 | kmem_cache_free(btrfs_transaction_cachep, t); |
3083 | } | 3081 | } |
3084 | 3082 | ||
3083 | spin_lock(&root->fs_info->trans_lock); | ||
3084 | root->fs_info->trans_no_join = 0; | ||
3085 | spin_unlock(&root->fs_info->trans_lock); | ||
3085 | mutex_unlock(&root->fs_info->transaction_kthread_mutex); | 3086 | mutex_unlock(&root->fs_info->transaction_kthread_mutex); |
3086 | mutex_unlock(&root->fs_info->trans_mutex); | ||
3087 | 3087 | ||
3088 | return 0; | 3088 | return 0; |
3089 | } | 3089 | } |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 169bd62ce776..5b9b6b6df242 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -348,7 +348,7 @@ static int caching_kthread(void *data) | |||
348 | */ | 348 | */ |
349 | path->skip_locking = 1; | 349 | path->skip_locking = 1; |
350 | path->search_commit_root = 1; | 350 | path->search_commit_root = 1; |
351 | path->reada = 2; | 351 | path->reada = 1; |
352 | 352 | ||
353 | key.objectid = last; | 353 | key.objectid = last; |
354 | key.offset = 0; | 354 | key.offset = 0; |
@@ -366,8 +366,7 @@ again: | |||
366 | nritems = btrfs_header_nritems(leaf); | 366 | nritems = btrfs_header_nritems(leaf); |
367 | 367 | ||
368 | while (1) { | 368 | while (1) { |
369 | smp_mb(); | 369 | if (btrfs_fs_closing(fs_info) > 1) { |
370 | if (fs_info->closing > 1) { | ||
371 | last = (u64)-1; | 370 | last = (u64)-1; |
372 | break; | 371 | break; |
373 | } | 372 | } |
@@ -379,15 +378,18 @@ again: | |||
379 | if (ret) | 378 | if (ret) |
380 | break; | 379 | break; |
381 | 380 | ||
382 | caching_ctl->progress = last; | 381 | if (need_resched() || |
383 | btrfs_release_path(path); | 382 | btrfs_next_leaf(extent_root, path)) { |
384 | up_read(&fs_info->extent_commit_sem); | 383 | caching_ctl->progress = last; |
385 | mutex_unlock(&caching_ctl->mutex); | 384 | btrfs_release_path(path); |
386 | if (btrfs_transaction_in_commit(fs_info)) | 385 | up_read(&fs_info->extent_commit_sem); |
387 | schedule_timeout(1); | 386 | mutex_unlock(&caching_ctl->mutex); |
388 | else | ||
389 | cond_resched(); | 387 | cond_resched(); |
390 | goto again; | 388 | goto again; |
389 | } | ||
390 | leaf = path->nodes[0]; | ||
391 | nritems = btrfs_header_nritems(leaf); | ||
392 | continue; | ||
391 | } | 393 | } |
392 | 394 | ||
393 | if (key.objectid < block_group->key.objectid) { | 395 | if (key.objectid < block_group->key.objectid) { |
@@ -3065,7 +3067,7 @@ again: | |||
3065 | spin_unlock(&data_sinfo->lock); | 3067 | spin_unlock(&data_sinfo->lock); |
3066 | alloc: | 3068 | alloc: |
3067 | alloc_target = btrfs_get_alloc_profile(root, 1); | 3069 | alloc_target = btrfs_get_alloc_profile(root, 1); |
3068 | trans = btrfs_join_transaction(root, 1); | 3070 | trans = btrfs_join_transaction(root); |
3069 | if (IS_ERR(trans)) | 3071 | if (IS_ERR(trans)) |
3070 | return PTR_ERR(trans); | 3072 | return PTR_ERR(trans); |
3071 | 3073 | ||
@@ -3091,9 +3093,10 @@ alloc: | |||
3091 | 3093 | ||
3092 | /* commit the current transaction and try again */ | 3094 | /* commit the current transaction and try again */ |
3093 | commit_trans: | 3095 | commit_trans: |
3094 | if (!committed && !root->fs_info->open_ioctl_trans) { | 3096 | if (!committed && |
3097 | !atomic_read(&root->fs_info->open_ioctl_trans)) { | ||
3095 | committed = 1; | 3098 | committed = 1; |
3096 | trans = btrfs_join_transaction(root, 1); | 3099 | trans = btrfs_join_transaction(root); |
3097 | if (IS_ERR(trans)) | 3100 | if (IS_ERR(trans)) |
3098 | return PTR_ERR(trans); | 3101 | return PTR_ERR(trans); |
3099 | ret = btrfs_commit_transaction(trans, root); | 3102 | ret = btrfs_commit_transaction(trans, root); |
@@ -3472,7 +3475,7 @@ again: | |||
3472 | goto out; | 3475 | goto out; |
3473 | 3476 | ||
3474 | ret = -ENOSPC; | 3477 | ret = -ENOSPC; |
3475 | trans = btrfs_join_transaction(root, 1); | 3478 | trans = btrfs_join_transaction(root); |
3476 | if (IS_ERR(trans)) | 3479 | if (IS_ERR(trans)) |
3477 | goto out; | 3480 | goto out; |
3478 | ret = btrfs_commit_transaction(trans, root); | 3481 | ret = btrfs_commit_transaction(trans, root); |
@@ -3699,7 +3702,7 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, | |||
3699 | if (trans) | 3702 | if (trans) |
3700 | return -EAGAIN; | 3703 | return -EAGAIN; |
3701 | 3704 | ||
3702 | trans = btrfs_join_transaction(root, 1); | 3705 | trans = btrfs_join_transaction(root); |
3703 | BUG_ON(IS_ERR(trans)); | 3706 | BUG_ON(IS_ERR(trans)); |
3704 | ret = btrfs_commit_transaction(trans, root); | 3707 | ret = btrfs_commit_transaction(trans, root); |
3705 | return 0; | 3708 | return 0; |
@@ -3837,6 +3840,37 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info) | |||
3837 | WARN_ON(fs_info->chunk_block_rsv.reserved > 0); | 3840 | WARN_ON(fs_info->chunk_block_rsv.reserved > 0); |
3838 | } | 3841 | } |
3839 | 3842 | ||
3843 | int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans, | ||
3844 | struct btrfs_root *root, | ||
3845 | struct btrfs_block_rsv *rsv) | ||
3846 | { | ||
3847 | struct btrfs_block_rsv *trans_rsv = &root->fs_info->trans_block_rsv; | ||
3848 | u64 num_bytes; | ||
3849 | int ret; | ||
3850 | |||
3851 | /* | ||
3852 | * Truncate should be freeing data, but give us 2 items just in case it | ||
3853 | * needs to use some space. We may want to be smarter about this in the | ||
3854 | * future. | ||
3855 | */ | ||
3856 | num_bytes = btrfs_calc_trans_metadata_size(root, 2); | ||
3857 | |||
3858 | /* We already have enough bytes, just return */ | ||
3859 | if (rsv->reserved >= num_bytes) | ||
3860 | return 0; | ||
3861 | |||
3862 | num_bytes -= rsv->reserved; | ||
3863 | |||
3864 | /* | ||
3865 | * You should have reserved enough space before hand to do this, so this | ||
3866 | * should not fail. | ||
3867 | */ | ||
3868 | ret = block_rsv_migrate_bytes(trans_rsv, rsv, num_bytes); | ||
3869 | BUG_ON(ret); | ||
3870 | |||
3871 | return 0; | ||
3872 | } | ||
3873 | |||
3840 | int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, | 3874 | int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, |
3841 | struct btrfs_root *root, | 3875 | struct btrfs_root *root, |
3842 | int num_items) | 3876 | int num_items) |
@@ -3877,23 +3911,18 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, | |||
3877 | struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv; | 3911 | struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv; |
3878 | 3912 | ||
3879 | /* | 3913 | /* |
3880 | * one for deleting orphan item, one for updating inode and | 3914 | * We need to hold space in order to delete our orphan item once we've |
3881 | * two for calling btrfs_truncate_inode_items. | 3915 | * added it, so this takes the reservation so we can release it later |
3882 | * | 3916 | * when we are truly done with the orphan item. |
3883 | * btrfs_truncate_inode_items is a delete operation, it frees | ||
3884 | * more space than it uses in most cases. So two units of | ||
3885 | * metadata space should be enough for calling it many times. | ||
3886 | * If all of the metadata space is used, we can commit | ||
3887 | * transaction and use space it freed. | ||
3888 | */ | 3917 | */ |
3889 | u64 num_bytes = btrfs_calc_trans_metadata_size(root, 4); | 3918 | u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1); |
3890 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); | 3919 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); |
3891 | } | 3920 | } |
3892 | 3921 | ||
3893 | void btrfs_orphan_release_metadata(struct inode *inode) | 3922 | void btrfs_orphan_release_metadata(struct inode *inode) |
3894 | { | 3923 | { |
3895 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3924 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3896 | u64 num_bytes = btrfs_calc_trans_metadata_size(root, 4); | 3925 | u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1); |
3897 | btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes); | 3926 | btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes); |
3898 | } | 3927 | } |
3899 | 3928 | ||
@@ -4987,6 +5016,15 @@ have_block_group: | |||
4987 | if (unlikely(block_group->ro)) | 5016 | if (unlikely(block_group->ro)) |
4988 | goto loop; | 5017 | goto loop; |
4989 | 5018 | ||
5019 | spin_lock(&block_group->free_space_ctl->tree_lock); | ||
5020 | if (cached && | ||
5021 | block_group->free_space_ctl->free_space < | ||
5022 | num_bytes + empty_size) { | ||
5023 | spin_unlock(&block_group->free_space_ctl->tree_lock); | ||
5024 | goto loop; | ||
5025 | } | ||
5026 | spin_unlock(&block_group->free_space_ctl->tree_lock); | ||
5027 | |||
4990 | /* | 5028 | /* |
4991 | * Ok we want to try and use the cluster allocator, so lets look | 5029 | * Ok we want to try and use the cluster allocator, so lets look |
4992 | * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will | 5030 | * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will |
@@ -5150,6 +5188,7 @@ checks: | |||
5150 | btrfs_add_free_space(block_group, offset, | 5188 | btrfs_add_free_space(block_group, offset, |
5151 | search_start - offset); | 5189 | search_start - offset); |
5152 | BUG_ON(offset > search_start); | 5190 | BUG_ON(offset > search_start); |
5191 | btrfs_put_block_group(block_group); | ||
5153 | break; | 5192 | break; |
5154 | loop: | 5193 | loop: |
5155 | failed_cluster_refill = false; | 5194 | failed_cluster_refill = false; |
@@ -5242,14 +5281,7 @@ loop: | |||
5242 | ret = -ENOSPC; | 5281 | ret = -ENOSPC; |
5243 | } else if (!ins->objectid) { | 5282 | } else if (!ins->objectid) { |
5244 | ret = -ENOSPC; | 5283 | ret = -ENOSPC; |
5245 | } | 5284 | } else if (ins->objectid) { |
5246 | |||
5247 | /* we found what we needed */ | ||
5248 | if (ins->objectid) { | ||
5249 | if (!(data & BTRFS_BLOCK_GROUP_DATA)) | ||
5250 | trans->block_group = block_group->key.objectid; | ||
5251 | |||
5252 | btrfs_put_block_group(block_group); | ||
5253 | ret = 0; | 5285 | ret = 0; |
5254 | } | 5286 | } |
5255 | 5287 | ||
@@ -6526,7 +6558,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, | |||
6526 | 6558 | ||
6527 | BUG_ON(cache->ro); | 6559 | BUG_ON(cache->ro); |
6528 | 6560 | ||
6529 | trans = btrfs_join_transaction(root, 1); | 6561 | trans = btrfs_join_transaction(root); |
6530 | BUG_ON(IS_ERR(trans)); | 6562 | BUG_ON(IS_ERR(trans)); |
6531 | 6563 | ||
6532 | alloc_flags = update_block_group_flags(root, cache->flags); | 6564 | alloc_flags = update_block_group_flags(root, cache->flags); |
@@ -6882,6 +6914,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
6882 | path = btrfs_alloc_path(); | 6914 | path = btrfs_alloc_path(); |
6883 | if (!path) | 6915 | if (!path) |
6884 | return -ENOMEM; | 6916 | return -ENOMEM; |
6917 | path->reada = 1; | ||
6885 | 6918 | ||
6886 | cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy); | 6919 | cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy); |
6887 | if (cache_gen != 0 && | 6920 | if (cache_gen != 0 && |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index c5d9fbb92bc3..7055d11c1efd 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -1476,7 +1476,7 @@ u64 count_range_bits(struct extent_io_tree *tree, | |||
1476 | if (total_bytes >= max_bytes) | 1476 | if (total_bytes >= max_bytes) |
1477 | break; | 1477 | break; |
1478 | if (!found) { | 1478 | if (!found) { |
1479 | *start = state->start; | 1479 | *start = max(cur_start, state->start); |
1480 | found = 1; | 1480 | found = 1; |
1481 | } | 1481 | } |
1482 | last = state->end; | 1482 | last = state->end; |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index c6a22d783c35..fa4ef18b66b1 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -129,7 +129,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, | |||
129 | if (!btrfs_test_opt(root, AUTO_DEFRAG)) | 129 | if (!btrfs_test_opt(root, AUTO_DEFRAG)) |
130 | return 0; | 130 | return 0; |
131 | 131 | ||
132 | if (root->fs_info->closing) | 132 | if (btrfs_fs_closing(root->fs_info)) |
133 | return 0; | 133 | return 0; |
134 | 134 | ||
135 | if (BTRFS_I(inode)->in_defrag) | 135 | if (BTRFS_I(inode)->in_defrag) |
@@ -144,7 +144,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, | |||
144 | if (!defrag) | 144 | if (!defrag) |
145 | return -ENOMEM; | 145 | return -ENOMEM; |
146 | 146 | ||
147 | defrag->ino = inode->i_ino; | 147 | defrag->ino = btrfs_ino(inode); |
148 | defrag->transid = transid; | 148 | defrag->transid = transid; |
149 | defrag->root = root->root_key.objectid; | 149 | defrag->root = root->root_key.objectid; |
150 | 150 | ||
@@ -229,7 +229,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) | |||
229 | first_ino = defrag->ino + 1; | 229 | first_ino = defrag->ino + 1; |
230 | rb_erase(&defrag->rb_node, &fs_info->defrag_inodes); | 230 | rb_erase(&defrag->rb_node, &fs_info->defrag_inodes); |
231 | 231 | ||
232 | if (fs_info->closing) | 232 | if (btrfs_fs_closing(fs_info)) |
233 | goto next_free; | 233 | goto next_free; |
234 | 234 | ||
235 | spin_unlock(&fs_info->defrag_inodes_lock); | 235 | spin_unlock(&fs_info->defrag_inodes_lock); |
@@ -1480,14 +1480,12 @@ int btrfs_sync_file(struct file *file, int datasync) | |||
1480 | * the current transaction, we can bail out now without any | 1480 | * the current transaction, we can bail out now without any |
1481 | * syncing | 1481 | * syncing |
1482 | */ | 1482 | */ |
1483 | mutex_lock(&root->fs_info->trans_mutex); | 1483 | smp_mb(); |
1484 | if (BTRFS_I(inode)->last_trans <= | 1484 | if (BTRFS_I(inode)->last_trans <= |
1485 | root->fs_info->last_trans_committed) { | 1485 | root->fs_info->last_trans_committed) { |
1486 | BTRFS_I(inode)->last_trans = 0; | 1486 | BTRFS_I(inode)->last_trans = 0; |
1487 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1488 | goto out; | 1487 | goto out; |
1489 | } | 1488 | } |
1490 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1491 | 1489 | ||
1492 | /* | 1490 | /* |
1493 | * ok we haven't committed the transaction yet, lets do a commit | 1491 | * ok we haven't committed the transaction yet, lets do a commit |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 70d45795d758..ad144736a5fd 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -98,7 +98,7 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root, | |||
98 | return inode; | 98 | return inode; |
99 | 99 | ||
100 | spin_lock(&block_group->lock); | 100 | spin_lock(&block_group->lock); |
101 | if (!root->fs_info->closing) { | 101 | if (!btrfs_fs_closing(root->fs_info)) { |
102 | block_group->inode = igrab(inode); | 102 | block_group->inode = igrab(inode); |
103 | block_group->iref = 1; | 103 | block_group->iref = 1; |
104 | } | 104 | } |
@@ -402,7 +402,14 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | |||
402 | spin_lock(&ctl->tree_lock); | 402 | spin_lock(&ctl->tree_lock); |
403 | ret = link_free_space(ctl, e); | 403 | ret = link_free_space(ctl, e); |
404 | spin_unlock(&ctl->tree_lock); | 404 | spin_unlock(&ctl->tree_lock); |
405 | BUG_ON(ret); | 405 | if (ret) { |
406 | printk(KERN_ERR "Duplicate entries in " | ||
407 | "free space cache, dumping\n"); | ||
408 | kunmap(page); | ||
409 | unlock_page(page); | ||
410 | page_cache_release(page); | ||
411 | goto free_cache; | ||
412 | } | ||
406 | } else { | 413 | } else { |
407 | e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); | 414 | e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); |
408 | if (!e->bitmap) { | 415 | if (!e->bitmap) { |
@@ -419,6 +426,14 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | |||
419 | ctl->op->recalc_thresholds(ctl); | 426 | ctl->op->recalc_thresholds(ctl); |
420 | spin_unlock(&ctl->tree_lock); | 427 | spin_unlock(&ctl->tree_lock); |
421 | list_add_tail(&e->list, &bitmaps); | 428 | list_add_tail(&e->list, &bitmaps); |
429 | if (ret) { | ||
430 | printk(KERN_ERR "Duplicate entries in " | ||
431 | "free space cache, dumping\n"); | ||
432 | kunmap(page); | ||
433 | unlock_page(page); | ||
434 | page_cache_release(page); | ||
435 | goto free_cache; | ||
436 | } | ||
422 | } | 437 | } |
423 | 438 | ||
424 | num_entries--; | 439 | num_entries--; |
@@ -478,8 +493,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, | |||
478 | * If we're unmounting then just return, since this does a search on the | 493 | * If we're unmounting then just return, since this does a search on the |
479 | * normal root and not the commit root and we could deadlock. | 494 | * normal root and not the commit root and we could deadlock. |
480 | */ | 495 | */ |
481 | smp_mb(); | 496 | if (btrfs_fs_closing(fs_info)) |
482 | if (fs_info->closing) | ||
483 | return 0; | 497 | return 0; |
484 | 498 | ||
485 | /* | 499 | /* |
@@ -575,10 +589,25 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
575 | 589 | ||
576 | num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> | 590 | num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> |
577 | PAGE_CACHE_SHIFT; | 591 | PAGE_CACHE_SHIFT; |
592 | |||
593 | /* Since the first page has all of our checksums and our generation we | ||
594 | * need to calculate the offset into the page that we can start writing | ||
595 | * our entries. | ||
596 | */ | ||
597 | first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64); | ||
598 | |||
578 | filemap_write_and_wait(inode->i_mapping); | 599 | filemap_write_and_wait(inode->i_mapping); |
579 | btrfs_wait_ordered_range(inode, inode->i_size & | 600 | btrfs_wait_ordered_range(inode, inode->i_size & |
580 | ~(root->sectorsize - 1), (u64)-1); | 601 | ~(root->sectorsize - 1), (u64)-1); |
581 | 602 | ||
603 | /* make sure we don't overflow that first page */ | ||
604 | if (first_page_offset + sizeof(struct btrfs_free_space_entry) >= PAGE_CACHE_SIZE) { | ||
605 | /* this is really the same as running out of space, where we also return 0 */ | ||
606 | printk(KERN_CRIT "Btrfs: free space cache was too big for the crc page\n"); | ||
607 | ret = 0; | ||
608 | goto out_update; | ||
609 | } | ||
610 | |||
582 | /* We need a checksum per page. */ | 611 | /* We need a checksum per page. */ |
583 | crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS); | 612 | crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS); |
584 | if (!crc) | 613 | if (!crc) |
@@ -590,12 +619,6 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
590 | return -1; | 619 | return -1; |
591 | } | 620 | } |
592 | 621 | ||
593 | /* Since the first page has all of our checksums and our generation we | ||
594 | * need to calculate the offset into the page that we can start writing | ||
595 | * our entries. | ||
596 | */ | ||
597 | first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64); | ||
598 | |||
599 | /* Get the cluster for this block_group if it exists */ | 622 | /* Get the cluster for this block_group if it exists */ |
600 | if (block_group && !list_empty(&block_group->cluster_list)) | 623 | if (block_group && !list_empty(&block_group->cluster_list)) |
601 | cluster = list_entry(block_group->cluster_list.next, | 624 | cluster = list_entry(block_group->cluster_list.next, |
@@ -857,12 +880,14 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
857 | ret = 1; | 880 | ret = 1; |
858 | 881 | ||
859 | out_free: | 882 | out_free: |
883 | kfree(checksums); | ||
884 | kfree(pages); | ||
885 | |||
886 | out_update: | ||
860 | if (ret != 1) { | 887 | if (ret != 1) { |
861 | invalidate_inode_pages2_range(inode->i_mapping, 0, index); | 888 | invalidate_inode_pages2_range(inode->i_mapping, 0, index); |
862 | BTRFS_I(inode)->generation = 0; | 889 | BTRFS_I(inode)->generation = 0; |
863 | } | 890 | } |
864 | kfree(checksums); | ||
865 | kfree(pages); | ||
866 | btrfs_update_inode(trans, root, inode); | 891 | btrfs_update_inode(trans, root, inode); |
867 | return ret; | 892 | return ret; |
868 | } | 893 | } |
@@ -963,10 +988,16 @@ static int tree_insert_offset(struct rb_root *root, u64 offset, | |||
963 | * logically. | 988 | * logically. |
964 | */ | 989 | */ |
965 | if (bitmap) { | 990 | if (bitmap) { |
966 | WARN_ON(info->bitmap); | 991 | if (info->bitmap) { |
992 | WARN_ON_ONCE(1); | ||
993 | return -EEXIST; | ||
994 | } | ||
967 | p = &(*p)->rb_right; | 995 | p = &(*p)->rb_right; |
968 | } else { | 996 | } else { |
969 | WARN_ON(!info->bitmap); | 997 | if (!info->bitmap) { |
998 | WARN_ON_ONCE(1); | ||
999 | return -EEXIST; | ||
1000 | } | ||
970 | p = &(*p)->rb_left; | 1001 | p = &(*p)->rb_left; |
971 | } | 1002 | } |
972 | } | 1003 | } |
@@ -2481,7 +2512,7 @@ struct inode *lookup_free_ino_inode(struct btrfs_root *root, | |||
2481 | return inode; | 2512 | return inode; |
2482 | 2513 | ||
2483 | spin_lock(&root->cache_lock); | 2514 | spin_lock(&root->cache_lock); |
2484 | if (!root->fs_info->closing) | 2515 | if (!btrfs_fs_closing(root->fs_info)) |
2485 | root->cache_inode = igrab(inode); | 2516 | root->cache_inode = igrab(inode); |
2486 | spin_unlock(&root->cache_lock); | 2517 | spin_unlock(&root->cache_lock); |
2487 | 2518 | ||
@@ -2504,12 +2535,14 @@ int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root) | |||
2504 | int ret = 0; | 2535 | int ret = 0; |
2505 | u64 root_gen = btrfs_root_generation(&root->root_item); | 2536 | u64 root_gen = btrfs_root_generation(&root->root_item); |
2506 | 2537 | ||
2538 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) | ||
2539 | return 0; | ||
2540 | |||
2507 | /* | 2541 | /* |
2508 | * If we're unmounting then just return, since this does a search on the | 2542 | * If we're unmounting then just return, since this does a search on the |
2509 | * normal root and not the commit root and we could deadlock. | 2543 | * normal root and not the commit root and we could deadlock. |
2510 | */ | 2544 | */ |
2511 | smp_mb(); | 2545 | if (btrfs_fs_closing(fs_info)) |
2512 | if (fs_info->closing) | ||
2513 | return 0; | 2546 | return 0; |
2514 | 2547 | ||
2515 | path = btrfs_alloc_path(); | 2548 | path = btrfs_alloc_path(); |
@@ -2543,6 +2576,9 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root, | |||
2543 | struct inode *inode; | 2576 | struct inode *inode; |
2544 | int ret; | 2577 | int ret; |
2545 | 2578 | ||
2579 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) | ||
2580 | return 0; | ||
2581 | |||
2546 | inode = lookup_free_ino_inode(root, path); | 2582 | inode = lookup_free_ino_inode(root, path); |
2547 | if (IS_ERR(inode)) | 2583 | if (IS_ERR(inode)) |
2548 | return 0; | 2584 | return 0; |
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 3262cd17a12f..b4087e0fa871 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c | |||
@@ -38,6 +38,9 @@ static int caching_kthread(void *data) | |||
38 | int slot; | 38 | int slot; |
39 | int ret; | 39 | int ret; |
40 | 40 | ||
41 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) | ||
42 | return 0; | ||
43 | |||
41 | path = btrfs_alloc_path(); | 44 | path = btrfs_alloc_path(); |
42 | if (!path) | 45 | if (!path) |
43 | return -ENOMEM; | 46 | return -ENOMEM; |
@@ -59,8 +62,7 @@ again: | |||
59 | goto out; | 62 | goto out; |
60 | 63 | ||
61 | while (1) { | 64 | while (1) { |
62 | smp_mb(); | 65 | if (btrfs_fs_closing(fs_info)) |
63 | if (fs_info->closing) | ||
64 | goto out; | 66 | goto out; |
65 | 67 | ||
66 | leaf = path->nodes[0]; | 68 | leaf = path->nodes[0]; |
@@ -141,6 +143,9 @@ static void start_caching(struct btrfs_root *root) | |||
141 | int ret; | 143 | int ret; |
142 | u64 objectid; | 144 | u64 objectid; |
143 | 145 | ||
146 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) | ||
147 | return; | ||
148 | |||
144 | spin_lock(&root->cache_lock); | 149 | spin_lock(&root->cache_lock); |
145 | if (root->cached != BTRFS_CACHE_NO) { | 150 | if (root->cached != BTRFS_CACHE_NO) { |
146 | spin_unlock(&root->cache_lock); | 151 | spin_unlock(&root->cache_lock); |
@@ -178,6 +183,9 @@ static void start_caching(struct btrfs_root *root) | |||
178 | 183 | ||
179 | int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid) | 184 | int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid) |
180 | { | 185 | { |
186 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) | ||
187 | return btrfs_find_free_objectid(root, objectid); | ||
188 | |||
181 | again: | 189 | again: |
182 | *objectid = btrfs_find_ino_for_alloc(root); | 190 | *objectid = btrfs_find_ino_for_alloc(root); |
183 | 191 | ||
@@ -201,6 +209,10 @@ void btrfs_return_ino(struct btrfs_root *root, u64 objectid) | |||
201 | { | 209 | { |
202 | struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; | 210 | struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; |
203 | struct btrfs_free_space_ctl *pinned = root->free_ino_pinned; | 211 | struct btrfs_free_space_ctl *pinned = root->free_ino_pinned; |
212 | |||
213 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) | ||
214 | return; | ||
215 | |||
204 | again: | 216 | again: |
205 | if (root->cached == BTRFS_CACHE_FINISHED) { | 217 | if (root->cached == BTRFS_CACHE_FINISHED) { |
206 | __btrfs_add_free_space(ctl, objectid, 1); | 218 | __btrfs_add_free_space(ctl, objectid, 1); |
@@ -250,6 +262,9 @@ void btrfs_unpin_free_ino(struct btrfs_root *root) | |||
250 | struct rb_node *n; | 262 | struct rb_node *n; |
251 | u64 count; | 263 | u64 count; |
252 | 264 | ||
265 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) | ||
266 | return; | ||
267 | |||
253 | while (1) { | 268 | while (1) { |
254 | n = rb_first(rbroot); | 269 | n = rb_first(rbroot); |
255 | if (!n) | 270 | if (!n) |
@@ -388,9 +403,24 @@ int btrfs_save_ino_cache(struct btrfs_root *root, | |||
388 | int prealloc; | 403 | int prealloc; |
389 | bool retry = false; | 404 | bool retry = false; |
390 | 405 | ||
406 | /* only fs tree and subvol/snap needs ino cache */ | ||
407 | if (root->root_key.objectid != BTRFS_FS_TREE_OBJECTID && | ||
408 | (root->root_key.objectid < BTRFS_FIRST_FREE_OBJECTID || | ||
409 | root->root_key.objectid > BTRFS_LAST_FREE_OBJECTID)) | ||
410 | return 0; | ||
411 | |||
412 | /* Don't save inode cache if we are deleting this root */ | ||
413 | if (btrfs_root_refs(&root->root_item) == 0 && | ||
414 | root != root->fs_info->tree_root) | ||
415 | return 0; | ||
416 | |||
417 | if (!btrfs_test_opt(root, INODE_MAP_CACHE)) | ||
418 | return 0; | ||
419 | |||
391 | path = btrfs_alloc_path(); | 420 | path = btrfs_alloc_path(); |
392 | if (!path) | 421 | if (!path) |
393 | return -ENOMEM; | 422 | return -ENOMEM; |
423 | |||
394 | again: | 424 | again: |
395 | inode = lookup_free_ino_inode(root, path); | 425 | inode = lookup_free_ino_inode(root, path); |
396 | if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { | 426 | if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index bb51bb1fa44f..ebf95f7a44d6 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -138,7 +138,6 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, | |||
138 | return -ENOMEM; | 138 | return -ENOMEM; |
139 | 139 | ||
140 | path->leave_spinning = 1; | 140 | path->leave_spinning = 1; |
141 | btrfs_set_trans_block_group(trans, inode); | ||
142 | 141 | ||
143 | key.objectid = btrfs_ino(inode); | 142 | key.objectid = btrfs_ino(inode); |
144 | key.offset = start; | 143 | key.offset = start; |
@@ -426,9 +425,8 @@ again: | |||
426 | } | 425 | } |
427 | } | 426 | } |
428 | if (start == 0) { | 427 | if (start == 0) { |
429 | trans = btrfs_join_transaction(root, 1); | 428 | trans = btrfs_join_transaction(root); |
430 | BUG_ON(IS_ERR(trans)); | 429 | BUG_ON(IS_ERR(trans)); |
431 | btrfs_set_trans_block_group(trans, inode); | ||
432 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | 430 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; |
433 | 431 | ||
434 | /* lets try to make an inline extent */ | 432 | /* lets try to make an inline extent */ |
@@ -623,8 +621,9 @@ retry: | |||
623 | async_extent->start + async_extent->ram_size - 1, | 621 | async_extent->start + async_extent->ram_size - 1, |
624 | GFP_NOFS); | 622 | GFP_NOFS); |
625 | 623 | ||
626 | trans = btrfs_join_transaction(root, 1); | 624 | trans = btrfs_join_transaction(root); |
627 | BUG_ON(IS_ERR(trans)); | 625 | BUG_ON(IS_ERR(trans)); |
626 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
628 | ret = btrfs_reserve_extent(trans, root, | 627 | ret = btrfs_reserve_extent(trans, root, |
629 | async_extent->compressed_size, | 628 | async_extent->compressed_size, |
630 | async_extent->compressed_size, | 629 | async_extent->compressed_size, |
@@ -793,9 +792,8 @@ static noinline int cow_file_range(struct inode *inode, | |||
793 | int ret = 0; | 792 | int ret = 0; |
794 | 793 | ||
795 | BUG_ON(is_free_space_inode(root, inode)); | 794 | BUG_ON(is_free_space_inode(root, inode)); |
796 | trans = btrfs_join_transaction(root, 1); | 795 | trans = btrfs_join_transaction(root); |
797 | BUG_ON(IS_ERR(trans)); | 796 | BUG_ON(IS_ERR(trans)); |
798 | btrfs_set_trans_block_group(trans, inode); | ||
799 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | 797 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; |
800 | 798 | ||
801 | num_bytes = (end - start + blocksize) & ~(blocksize - 1); | 799 | num_bytes = (end - start + blocksize) & ~(blocksize - 1); |
@@ -1077,10 +1075,12 @@ static noinline int run_delalloc_nocow(struct inode *inode, | |||
1077 | nolock = is_free_space_inode(root, inode); | 1075 | nolock = is_free_space_inode(root, inode); |
1078 | 1076 | ||
1079 | if (nolock) | 1077 | if (nolock) |
1080 | trans = btrfs_join_transaction_nolock(root, 1); | 1078 | trans = btrfs_join_transaction_nolock(root); |
1081 | else | 1079 | else |
1082 | trans = btrfs_join_transaction(root, 1); | 1080 | trans = btrfs_join_transaction(root); |
1081 | |||
1083 | BUG_ON(IS_ERR(trans)); | 1082 | BUG_ON(IS_ERR(trans)); |
1083 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
1084 | 1084 | ||
1085 | cow_start = (u64)-1; | 1085 | cow_start = (u64)-1; |
1086 | cur_offset = start; | 1086 | cur_offset = start; |
@@ -1519,8 +1519,6 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans, | |||
1519 | { | 1519 | { |
1520 | struct btrfs_ordered_sum *sum; | 1520 | struct btrfs_ordered_sum *sum; |
1521 | 1521 | ||
1522 | btrfs_set_trans_block_group(trans, inode); | ||
1523 | |||
1524 | list_for_each_entry(sum, list, list) { | 1522 | list_for_each_entry(sum, list, list) { |
1525 | btrfs_csum_file_blocks(trans, | 1523 | btrfs_csum_file_blocks(trans, |
1526 | BTRFS_I(inode)->root->fs_info->csum_root, sum); | 1524 | BTRFS_I(inode)->root->fs_info->csum_root, sum); |
@@ -1735,11 +1733,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1735 | ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); | 1733 | ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); |
1736 | if (!ret) { | 1734 | if (!ret) { |
1737 | if (nolock) | 1735 | if (nolock) |
1738 | trans = btrfs_join_transaction_nolock(root, 1); | 1736 | trans = btrfs_join_transaction_nolock(root); |
1739 | else | 1737 | else |
1740 | trans = btrfs_join_transaction(root, 1); | 1738 | trans = btrfs_join_transaction(root); |
1741 | BUG_ON(IS_ERR(trans)); | 1739 | BUG_ON(IS_ERR(trans)); |
1742 | btrfs_set_trans_block_group(trans, inode); | ||
1743 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | 1740 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; |
1744 | ret = btrfs_update_inode(trans, root, inode); | 1741 | ret = btrfs_update_inode(trans, root, inode); |
1745 | BUG_ON(ret); | 1742 | BUG_ON(ret); |
@@ -1752,11 +1749,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1752 | 0, &cached_state, GFP_NOFS); | 1749 | 0, &cached_state, GFP_NOFS); |
1753 | 1750 | ||
1754 | if (nolock) | 1751 | if (nolock) |
1755 | trans = btrfs_join_transaction_nolock(root, 1); | 1752 | trans = btrfs_join_transaction_nolock(root); |
1756 | else | 1753 | else |
1757 | trans = btrfs_join_transaction(root, 1); | 1754 | trans = btrfs_join_transaction(root); |
1758 | BUG_ON(IS_ERR(trans)); | 1755 | BUG_ON(IS_ERR(trans)); |
1759 | btrfs_set_trans_block_group(trans, inode); | ||
1760 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | 1756 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; |
1761 | 1757 | ||
1762 | if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) | 1758 | if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) |
@@ -2431,7 +2427,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2431 | (u64)-1); | 2427 | (u64)-1); |
2432 | 2428 | ||
2433 | if (root->orphan_block_rsv || root->orphan_item_inserted) { | 2429 | if (root->orphan_block_rsv || root->orphan_item_inserted) { |
2434 | trans = btrfs_join_transaction(root, 1); | 2430 | trans = btrfs_join_transaction(root); |
2435 | if (!IS_ERR(trans)) | 2431 | if (!IS_ERR(trans)) |
2436 | btrfs_end_transaction(trans, root); | 2432 | btrfs_end_transaction(trans, root); |
2437 | } | 2433 | } |
@@ -2511,12 +2507,12 @@ static void btrfs_read_locked_inode(struct inode *inode) | |||
2511 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2507 | struct btrfs_root *root = BTRFS_I(inode)->root; |
2512 | struct btrfs_key location; | 2508 | struct btrfs_key location; |
2513 | int maybe_acls; | 2509 | int maybe_acls; |
2514 | u64 alloc_group_block; | ||
2515 | u32 rdev; | 2510 | u32 rdev; |
2516 | int ret; | 2511 | int ret; |
2517 | 2512 | ||
2518 | path = btrfs_alloc_path(); | 2513 | path = btrfs_alloc_path(); |
2519 | BUG_ON(!path); | 2514 | BUG_ON(!path); |
2515 | path->leave_spinning = 1; | ||
2520 | memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); | 2516 | memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); |
2521 | 2517 | ||
2522 | ret = btrfs_lookup_inode(NULL, root, path, &location, 0); | 2518 | ret = btrfs_lookup_inode(NULL, root, path, &location, 0); |
@@ -2526,6 +2522,12 @@ static void btrfs_read_locked_inode(struct inode *inode) | |||
2526 | leaf = path->nodes[0]; | 2522 | leaf = path->nodes[0]; |
2527 | inode_item = btrfs_item_ptr(leaf, path->slots[0], | 2523 | inode_item = btrfs_item_ptr(leaf, path->slots[0], |
2528 | struct btrfs_inode_item); | 2524 | struct btrfs_inode_item); |
2525 | if (!leaf->map_token) | ||
2526 | map_private_extent_buffer(leaf, (unsigned long)inode_item, | ||
2527 | sizeof(struct btrfs_inode_item), | ||
2528 | &leaf->map_token, &leaf->kaddr, | ||
2529 | &leaf->map_start, &leaf->map_len, | ||
2530 | KM_USER1); | ||
2529 | 2531 | ||
2530 | inode->i_mode = btrfs_inode_mode(leaf, inode_item); | 2532 | inode->i_mode = btrfs_inode_mode(leaf, inode_item); |
2531 | inode->i_nlink = btrfs_inode_nlink(leaf, inode_item); | 2533 | inode->i_nlink = btrfs_inode_nlink(leaf, inode_item); |
@@ -2555,8 +2557,6 @@ static void btrfs_read_locked_inode(struct inode *inode) | |||
2555 | BTRFS_I(inode)->index_cnt = (u64)-1; | 2557 | BTRFS_I(inode)->index_cnt = (u64)-1; |
2556 | BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); | 2558 | BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); |
2557 | 2559 | ||
2558 | alloc_group_block = btrfs_inode_block_group(leaf, inode_item); | ||
2559 | |||
2560 | /* | 2560 | /* |
2561 | * try to precache a NULL acl entry for files that don't have | 2561 | * try to precache a NULL acl entry for files that don't have |
2562 | * any xattrs or acls | 2562 | * any xattrs or acls |
@@ -2566,8 +2566,11 @@ static void btrfs_read_locked_inode(struct inode *inode) | |||
2566 | if (!maybe_acls) | 2566 | if (!maybe_acls) |
2567 | cache_no_acl(inode); | 2567 | cache_no_acl(inode); |
2568 | 2568 | ||
2569 | BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0, | 2569 | if (leaf->map_token) { |
2570 | alloc_group_block, 0); | 2570 | unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); |
2571 | leaf->map_token = NULL; | ||
2572 | } | ||
2573 | |||
2571 | btrfs_free_path(path); | 2574 | btrfs_free_path(path); |
2572 | inode_item = NULL; | 2575 | inode_item = NULL; |
2573 | 2576 | ||
@@ -2647,7 +2650,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, | |||
2647 | btrfs_set_inode_transid(leaf, item, trans->transid); | 2650 | btrfs_set_inode_transid(leaf, item, trans->transid); |
2648 | btrfs_set_inode_rdev(leaf, item, inode->i_rdev); | 2651 | btrfs_set_inode_rdev(leaf, item, inode->i_rdev); |
2649 | btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); | 2652 | btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); |
2650 | btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group); | 2653 | btrfs_set_inode_block_group(leaf, item, 0); |
2651 | 2654 | ||
2652 | if (leaf->map_token) { | 2655 | if (leaf->map_token) { |
2653 | unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); | 2656 | unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); |
@@ -3004,8 +3007,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | |||
3004 | if (IS_ERR(trans)) | 3007 | if (IS_ERR(trans)) |
3005 | return PTR_ERR(trans); | 3008 | return PTR_ERR(trans); |
3006 | 3009 | ||
3007 | btrfs_set_trans_block_group(trans, dir); | ||
3008 | |||
3009 | btrfs_record_unlink_dir(trans, dir, dentry->d_inode, 0); | 3010 | btrfs_record_unlink_dir(trans, dir, dentry->d_inode, 0); |
3010 | 3011 | ||
3011 | ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, | 3012 | ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, |
@@ -3094,8 +3095,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
3094 | if (IS_ERR(trans)) | 3095 | if (IS_ERR(trans)) |
3095 | return PTR_ERR(trans); | 3096 | return PTR_ERR(trans); |
3096 | 3097 | ||
3097 | btrfs_set_trans_block_group(trans, dir); | ||
3098 | |||
3099 | if (unlikely(btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { | 3098 | if (unlikely(btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { |
3100 | err = btrfs_unlink_subvol(trans, root, dir, | 3099 | err = btrfs_unlink_subvol(trans, root, dir, |
3101 | BTRFS_I(inode)->location.objectid, | 3100 | BTRFS_I(inode)->location.objectid, |
@@ -3514,7 +3513,6 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) | |||
3514 | err = PTR_ERR(trans); | 3513 | err = PTR_ERR(trans); |
3515 | break; | 3514 | break; |
3516 | } | 3515 | } |
3517 | btrfs_set_trans_block_group(trans, inode); | ||
3518 | 3516 | ||
3519 | err = btrfs_drop_extents(trans, inode, cur_offset, | 3517 | err = btrfs_drop_extents(trans, inode, cur_offset, |
3520 | cur_offset + hole_size, | 3518 | cur_offset + hole_size, |
@@ -3650,7 +3648,6 @@ void btrfs_evict_inode(struct inode *inode) | |||
3650 | while (1) { | 3648 | while (1) { |
3651 | trans = btrfs_start_transaction(root, 0); | 3649 | trans = btrfs_start_transaction(root, 0); |
3652 | BUG_ON(IS_ERR(trans)); | 3650 | BUG_ON(IS_ERR(trans)); |
3653 | btrfs_set_trans_block_group(trans, inode); | ||
3654 | trans->block_rsv = root->orphan_block_rsv; | 3651 | trans->block_rsv = root->orphan_block_rsv; |
3655 | 3652 | ||
3656 | ret = btrfs_block_rsv_check(trans, root, | 3653 | ret = btrfs_block_rsv_check(trans, root, |
@@ -4133,7 +4130,8 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, | |||
4133 | path = btrfs_alloc_path(); | 4130 | path = btrfs_alloc_path(); |
4134 | if (!path) | 4131 | if (!path) |
4135 | return -ENOMEM; | 4132 | return -ENOMEM; |
4136 | path->reada = 2; | 4133 | |
4134 | path->reada = 1; | ||
4137 | 4135 | ||
4138 | if (key_type == BTRFS_DIR_INDEX_KEY) { | 4136 | if (key_type == BTRFS_DIR_INDEX_KEY) { |
4139 | INIT_LIST_HEAD(&ins_list); | 4137 | INIT_LIST_HEAD(&ins_list); |
@@ -4268,18 +4266,16 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
4268 | if (BTRFS_I(inode)->dummy_inode) | 4266 | if (BTRFS_I(inode)->dummy_inode) |
4269 | return 0; | 4267 | return 0; |
4270 | 4268 | ||
4271 | smp_mb(); | 4269 | if (btrfs_fs_closing(root->fs_info) && is_free_space_inode(root, inode)) |
4272 | if (root->fs_info->closing && is_free_space_inode(root, inode)) | ||
4273 | nolock = true; | 4270 | nolock = true; |
4274 | 4271 | ||
4275 | if (wbc->sync_mode == WB_SYNC_ALL) { | 4272 | if (wbc->sync_mode == WB_SYNC_ALL) { |
4276 | if (nolock) | 4273 | if (nolock) |
4277 | trans = btrfs_join_transaction_nolock(root, 1); | 4274 | trans = btrfs_join_transaction_nolock(root); |
4278 | else | 4275 | else |
4279 | trans = btrfs_join_transaction(root, 1); | 4276 | trans = btrfs_join_transaction(root); |
4280 | if (IS_ERR(trans)) | 4277 | if (IS_ERR(trans)) |
4281 | return PTR_ERR(trans); | 4278 | return PTR_ERR(trans); |
4282 | btrfs_set_trans_block_group(trans, inode); | ||
4283 | if (nolock) | 4279 | if (nolock) |
4284 | ret = btrfs_end_transaction_nolock(trans, root); | 4280 | ret = btrfs_end_transaction_nolock(trans, root); |
4285 | else | 4281 | else |
@@ -4294,7 +4290,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
4294 | * FIXME, needs more benchmarking...there are no reasons other than performance | 4290 | * FIXME, needs more benchmarking...there are no reasons other than performance |
4295 | * to keep or drop this code. | 4291 | * to keep or drop this code. |
4296 | */ | 4292 | */ |
4297 | void btrfs_dirty_inode(struct inode *inode) | 4293 | void btrfs_dirty_inode(struct inode *inode, int flags) |
4298 | { | 4294 | { |
4299 | struct btrfs_root *root = BTRFS_I(inode)->root; | 4295 | struct btrfs_root *root = BTRFS_I(inode)->root; |
4300 | struct btrfs_trans_handle *trans; | 4296 | struct btrfs_trans_handle *trans; |
@@ -4303,9 +4299,8 @@ void btrfs_dirty_inode(struct inode *inode) | |||
4303 | if (BTRFS_I(inode)->dummy_inode) | 4299 | if (BTRFS_I(inode)->dummy_inode) |
4304 | return; | 4300 | return; |
4305 | 4301 | ||
4306 | trans = btrfs_join_transaction(root, 1); | 4302 | trans = btrfs_join_transaction(root); |
4307 | BUG_ON(IS_ERR(trans)); | 4303 | BUG_ON(IS_ERR(trans)); |
4308 | btrfs_set_trans_block_group(trans, inode); | ||
4309 | 4304 | ||
4310 | ret = btrfs_update_inode(trans, root, inode); | 4305 | ret = btrfs_update_inode(trans, root, inode); |
4311 | if (ret && ret == -ENOSPC) { | 4306 | if (ret && ret == -ENOSPC) { |
@@ -4319,7 +4314,6 @@ void btrfs_dirty_inode(struct inode *inode) | |||
4319 | PTR_ERR(trans)); | 4314 | PTR_ERR(trans)); |
4320 | return; | 4315 | return; |
4321 | } | 4316 | } |
4322 | btrfs_set_trans_block_group(trans, inode); | ||
4323 | 4317 | ||
4324 | ret = btrfs_update_inode(trans, root, inode); | 4318 | ret = btrfs_update_inode(trans, root, inode); |
4325 | if (ret) { | 4319 | if (ret) { |
@@ -4418,8 +4412,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
4418 | struct btrfs_root *root, | 4412 | struct btrfs_root *root, |
4419 | struct inode *dir, | 4413 | struct inode *dir, |
4420 | const char *name, int name_len, | 4414 | const char *name, int name_len, |
4421 | u64 ref_objectid, u64 objectid, | 4415 | u64 ref_objectid, u64 objectid, int mode, |
4422 | u64 alloc_hint, int mode, u64 *index) | 4416 | u64 *index) |
4423 | { | 4417 | { |
4424 | struct inode *inode; | 4418 | struct inode *inode; |
4425 | struct btrfs_inode_item *inode_item; | 4419 | struct btrfs_inode_item *inode_item; |
@@ -4472,8 +4466,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
4472 | owner = 0; | 4466 | owner = 0; |
4473 | else | 4467 | else |
4474 | owner = 1; | 4468 | owner = 1; |
4475 | BTRFS_I(inode)->block_group = | ||
4476 | btrfs_find_block_group(root, 0, alloc_hint, owner); | ||
4477 | 4469 | ||
4478 | key[0].objectid = objectid; | 4470 | key[0].objectid = objectid; |
4479 | btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); | 4471 | btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); |
@@ -4629,15 +4621,13 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
4629 | if (IS_ERR(trans)) | 4621 | if (IS_ERR(trans)) |
4630 | return PTR_ERR(trans); | 4622 | return PTR_ERR(trans); |
4631 | 4623 | ||
4632 | btrfs_set_trans_block_group(trans, dir); | ||
4633 | |||
4634 | err = btrfs_find_free_ino(root, &objectid); | 4624 | err = btrfs_find_free_ino(root, &objectid); |
4635 | if (err) | 4625 | if (err) |
4636 | goto out_unlock; | 4626 | goto out_unlock; |
4637 | 4627 | ||
4638 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | 4628 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, |
4639 | dentry->d_name.len, btrfs_ino(dir), objectid, | 4629 | dentry->d_name.len, btrfs_ino(dir), objectid, |
4640 | BTRFS_I(dir)->block_group, mode, &index); | 4630 | mode, &index); |
4641 | if (IS_ERR(inode)) { | 4631 | if (IS_ERR(inode)) { |
4642 | err = PTR_ERR(inode); | 4632 | err = PTR_ERR(inode); |
4643 | goto out_unlock; | 4633 | goto out_unlock; |
@@ -4649,7 +4639,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
4649 | goto out_unlock; | 4639 | goto out_unlock; |
4650 | } | 4640 | } |
4651 | 4641 | ||
4652 | btrfs_set_trans_block_group(trans, inode); | ||
4653 | err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); | 4642 | err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); |
4654 | if (err) | 4643 | if (err) |
4655 | drop_inode = 1; | 4644 | drop_inode = 1; |
@@ -4658,8 +4647,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
4658 | init_special_inode(inode, inode->i_mode, rdev); | 4647 | init_special_inode(inode, inode->i_mode, rdev); |
4659 | btrfs_update_inode(trans, root, inode); | 4648 | btrfs_update_inode(trans, root, inode); |
4660 | } | 4649 | } |
4661 | btrfs_update_inode_block_group(trans, inode); | ||
4662 | btrfs_update_inode_block_group(trans, dir); | ||
4663 | out_unlock: | 4650 | out_unlock: |
4664 | nr = trans->blocks_used; | 4651 | nr = trans->blocks_used; |
4665 | btrfs_end_transaction_throttle(trans, root); | 4652 | btrfs_end_transaction_throttle(trans, root); |
@@ -4692,15 +4679,13 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
4692 | if (IS_ERR(trans)) | 4679 | if (IS_ERR(trans)) |
4693 | return PTR_ERR(trans); | 4680 | return PTR_ERR(trans); |
4694 | 4681 | ||
4695 | btrfs_set_trans_block_group(trans, dir); | ||
4696 | |||
4697 | err = btrfs_find_free_ino(root, &objectid); | 4682 | err = btrfs_find_free_ino(root, &objectid); |
4698 | if (err) | 4683 | if (err) |
4699 | goto out_unlock; | 4684 | goto out_unlock; |
4700 | 4685 | ||
4701 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | 4686 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, |
4702 | dentry->d_name.len, btrfs_ino(dir), objectid, | 4687 | dentry->d_name.len, btrfs_ino(dir), objectid, |
4703 | BTRFS_I(dir)->block_group, mode, &index); | 4688 | mode, &index); |
4704 | if (IS_ERR(inode)) { | 4689 | if (IS_ERR(inode)) { |
4705 | err = PTR_ERR(inode); | 4690 | err = PTR_ERR(inode); |
4706 | goto out_unlock; | 4691 | goto out_unlock; |
@@ -4712,7 +4697,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
4712 | goto out_unlock; | 4697 | goto out_unlock; |
4713 | } | 4698 | } |
4714 | 4699 | ||
4715 | btrfs_set_trans_block_group(trans, inode); | ||
4716 | err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); | 4700 | err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); |
4717 | if (err) | 4701 | if (err) |
4718 | drop_inode = 1; | 4702 | drop_inode = 1; |
@@ -4723,8 +4707,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
4723 | inode->i_op = &btrfs_file_inode_operations; | 4707 | inode->i_op = &btrfs_file_inode_operations; |
4724 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; | 4708 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; |
4725 | } | 4709 | } |
4726 | btrfs_update_inode_block_group(trans, inode); | ||
4727 | btrfs_update_inode_block_group(trans, dir); | ||
4728 | out_unlock: | 4710 | out_unlock: |
4729 | nr = trans->blocks_used; | 4711 | nr = trans->blocks_used; |
4730 | btrfs_end_transaction_throttle(trans, root); | 4712 | btrfs_end_transaction_throttle(trans, root); |
@@ -4771,8 +4753,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
4771 | 4753 | ||
4772 | btrfs_inc_nlink(inode); | 4754 | btrfs_inc_nlink(inode); |
4773 | inode->i_ctime = CURRENT_TIME; | 4755 | inode->i_ctime = CURRENT_TIME; |
4774 | |||
4775 | btrfs_set_trans_block_group(trans, dir); | ||
4776 | ihold(inode); | 4756 | ihold(inode); |
4777 | 4757 | ||
4778 | err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index); | 4758 | err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index); |
@@ -4781,7 +4761,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
4781 | drop_inode = 1; | 4761 | drop_inode = 1; |
4782 | } else { | 4762 | } else { |
4783 | struct dentry *parent = dget_parent(dentry); | 4763 | struct dentry *parent = dget_parent(dentry); |
4784 | btrfs_update_inode_block_group(trans, dir); | ||
4785 | err = btrfs_update_inode(trans, root, inode); | 4764 | err = btrfs_update_inode(trans, root, inode); |
4786 | BUG_ON(err); | 4765 | BUG_ON(err); |
4787 | btrfs_log_new_name(trans, inode, NULL, parent); | 4766 | btrfs_log_new_name(trans, inode, NULL, parent); |
@@ -4818,7 +4797,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
4818 | trans = btrfs_start_transaction(root, 5); | 4797 | trans = btrfs_start_transaction(root, 5); |
4819 | if (IS_ERR(trans)) | 4798 | if (IS_ERR(trans)) |
4820 | return PTR_ERR(trans); | 4799 | return PTR_ERR(trans); |
4821 | btrfs_set_trans_block_group(trans, dir); | ||
4822 | 4800 | ||
4823 | err = btrfs_find_free_ino(root, &objectid); | 4801 | err = btrfs_find_free_ino(root, &objectid); |
4824 | if (err) | 4802 | if (err) |
@@ -4826,8 +4804,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
4826 | 4804 | ||
4827 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | 4805 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, |
4828 | dentry->d_name.len, btrfs_ino(dir), objectid, | 4806 | dentry->d_name.len, btrfs_ino(dir), objectid, |
4829 | BTRFS_I(dir)->block_group, S_IFDIR | mode, | 4807 | S_IFDIR | mode, &index); |
4830 | &index); | ||
4831 | if (IS_ERR(inode)) { | 4808 | if (IS_ERR(inode)) { |
4832 | err = PTR_ERR(inode); | 4809 | err = PTR_ERR(inode); |
4833 | goto out_fail; | 4810 | goto out_fail; |
@@ -4841,7 +4818,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
4841 | 4818 | ||
4842 | inode->i_op = &btrfs_dir_inode_operations; | 4819 | inode->i_op = &btrfs_dir_inode_operations; |
4843 | inode->i_fop = &btrfs_dir_file_operations; | 4820 | inode->i_fop = &btrfs_dir_file_operations; |
4844 | btrfs_set_trans_block_group(trans, inode); | ||
4845 | 4821 | ||
4846 | btrfs_i_size_write(inode, 0); | 4822 | btrfs_i_size_write(inode, 0); |
4847 | err = btrfs_update_inode(trans, root, inode); | 4823 | err = btrfs_update_inode(trans, root, inode); |
@@ -4855,8 +4831,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
4855 | 4831 | ||
4856 | d_instantiate(dentry, inode); | 4832 | d_instantiate(dentry, inode); |
4857 | drop_on_err = 0; | 4833 | drop_on_err = 0; |
4858 | btrfs_update_inode_block_group(trans, inode); | ||
4859 | btrfs_update_inode_block_group(trans, dir); | ||
4860 | 4834 | ||
4861 | out_fail: | 4835 | out_fail: |
4862 | nr = trans->blocks_used; | 4836 | nr = trans->blocks_used; |
@@ -4989,7 +4963,15 @@ again: | |||
4989 | 4963 | ||
4990 | if (!path) { | 4964 | if (!path) { |
4991 | path = btrfs_alloc_path(); | 4965 | path = btrfs_alloc_path(); |
4992 | BUG_ON(!path); | 4966 | if (!path) { |
4967 | err = -ENOMEM; | ||
4968 | goto out; | ||
4969 | } | ||
4970 | /* | ||
4971 | * Chances are we'll be called again, so go ahead and do | ||
4972 | * readahead | ||
4973 | */ | ||
4974 | path->reada = 1; | ||
4993 | } | 4975 | } |
4994 | 4976 | ||
4995 | ret = btrfs_lookup_file_extent(trans, root, path, | 4977 | ret = btrfs_lookup_file_extent(trans, root, path, |
@@ -5130,8 +5112,10 @@ again: | |||
5130 | kunmap(page); | 5112 | kunmap(page); |
5131 | free_extent_map(em); | 5113 | free_extent_map(em); |
5132 | em = NULL; | 5114 | em = NULL; |
5115 | |||
5133 | btrfs_release_path(path); | 5116 | btrfs_release_path(path); |
5134 | trans = btrfs_join_transaction(root, 1); | 5117 | trans = btrfs_join_transaction(root); |
5118 | |||
5135 | if (IS_ERR(trans)) | 5119 | if (IS_ERR(trans)) |
5136 | return ERR_CAST(trans); | 5120 | return ERR_CAST(trans); |
5137 | goto again; | 5121 | goto again; |
@@ -5375,7 +5359,7 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode, | |||
5375 | btrfs_drop_extent_cache(inode, start, start + len - 1, 0); | 5359 | btrfs_drop_extent_cache(inode, start, start + len - 1, 0); |
5376 | } | 5360 | } |
5377 | 5361 | ||
5378 | trans = btrfs_join_transaction(root, 0); | 5362 | trans = btrfs_join_transaction(root); |
5379 | if (IS_ERR(trans)) | 5363 | if (IS_ERR(trans)) |
5380 | return ERR_CAST(trans); | 5364 | return ERR_CAST(trans); |
5381 | 5365 | ||
@@ -5611,7 +5595,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | |||
5611 | * to make sure the current transaction stays open | 5595 | * to make sure the current transaction stays open |
5612 | * while we look for nocow cross refs | 5596 | * while we look for nocow cross refs |
5613 | */ | 5597 | */ |
5614 | trans = btrfs_join_transaction(root, 0); | 5598 | trans = btrfs_join_transaction(root); |
5615 | if (IS_ERR(trans)) | 5599 | if (IS_ERR(trans)) |
5616 | goto must_cow; | 5600 | goto must_cow; |
5617 | 5601 | ||
@@ -5750,7 +5734,7 @@ again: | |||
5750 | 5734 | ||
5751 | BUG_ON(!ordered); | 5735 | BUG_ON(!ordered); |
5752 | 5736 | ||
5753 | trans = btrfs_join_transaction(root, 1); | 5737 | trans = btrfs_join_transaction(root); |
5754 | if (IS_ERR(trans)) { | 5738 | if (IS_ERR(trans)) { |
5755 | err = -ENOMEM; | 5739 | err = -ENOMEM; |
5756 | goto out; | 5740 | goto out; |
@@ -6500,6 +6484,7 @@ out: | |||
6500 | static int btrfs_truncate(struct inode *inode) | 6484 | static int btrfs_truncate(struct inode *inode) |
6501 | { | 6485 | { |
6502 | struct btrfs_root *root = BTRFS_I(inode)->root; | 6486 | struct btrfs_root *root = BTRFS_I(inode)->root; |
6487 | struct btrfs_block_rsv *rsv; | ||
6503 | int ret; | 6488 | int ret; |
6504 | int err = 0; | 6489 | int err = 0; |
6505 | struct btrfs_trans_handle *trans; | 6490 | struct btrfs_trans_handle *trans; |
@@ -6513,28 +6498,80 @@ static int btrfs_truncate(struct inode *inode) | |||
6513 | btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); | 6498 | btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); |
6514 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); | 6499 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); |
6515 | 6500 | ||
6516 | trans = btrfs_start_transaction(root, 5); | 6501 | /* |
6517 | if (IS_ERR(trans)) | 6502 | * Yes ladies and gentelment, this is indeed ugly. The fact is we have |
6518 | return PTR_ERR(trans); | 6503 | * 3 things going on here |
6504 | * | ||
6505 | * 1) We need to reserve space for our orphan item and the space to | ||
6506 | * delete our orphan item. Lord knows we don't want to have a dangling | ||
6507 | * orphan item because we didn't reserve space to remove it. | ||
6508 | * | ||
6509 | * 2) We need to reserve space to update our inode. | ||
6510 | * | ||
6511 | * 3) We need to have something to cache all the space that is going to | ||
6512 | * be free'd up by the truncate operation, but also have some slack | ||
6513 | * space reserved in case it uses space during the truncate (thank you | ||
6514 | * very much snapshotting). | ||
6515 | * | ||
6516 | * And we need these to all be seperate. The fact is we can use alot of | ||
6517 | * space doing the truncate, and we have no earthly idea how much space | ||
6518 | * we will use, so we need the truncate reservation to be seperate so it | ||
6519 | * doesn't end up using space reserved for updating the inode or | ||
6520 | * removing the orphan item. We also need to be able to stop the | ||
6521 | * transaction and start a new one, which means we need to be able to | ||
6522 | * update the inode several times, and we have no idea of knowing how | ||
6523 | * many times that will be, so we can't just reserve 1 item for the | ||
6524 | * entirety of the opration, so that has to be done seperately as well. | ||
6525 | * Then there is the orphan item, which does indeed need to be held on | ||
6526 | * to for the whole operation, and we need nobody to touch this reserved | ||
6527 | * space except the orphan code. | ||
6528 | * | ||
6529 | * So that leaves us with | ||
6530 | * | ||
6531 | * 1) root->orphan_block_rsv - for the orphan deletion. | ||
6532 | * 2) rsv - for the truncate reservation, which we will steal from the | ||
6533 | * transaction reservation. | ||
6534 | * 3) fs_info->trans_block_rsv - this will have 1 items worth left for | ||
6535 | * updating the inode. | ||
6536 | */ | ||
6537 | rsv = btrfs_alloc_block_rsv(root); | ||
6538 | if (!rsv) | ||
6539 | return -ENOMEM; | ||
6540 | btrfs_add_durable_block_rsv(root->fs_info, rsv); | ||
6541 | |||
6542 | trans = btrfs_start_transaction(root, 4); | ||
6543 | if (IS_ERR(trans)) { | ||
6544 | err = PTR_ERR(trans); | ||
6545 | goto out; | ||
6546 | } | ||
6519 | 6547 | ||
6520 | btrfs_set_trans_block_group(trans, inode); | 6548 | /* |
6549 | * Reserve space for the truncate process. Truncate should be adding | ||
6550 | * space, but if there are snapshots it may end up using space. | ||
6551 | */ | ||
6552 | ret = btrfs_truncate_reserve_metadata(trans, root, rsv); | ||
6553 | BUG_ON(ret); | ||
6521 | 6554 | ||
6522 | ret = btrfs_orphan_add(trans, inode); | 6555 | ret = btrfs_orphan_add(trans, inode); |
6523 | if (ret) { | 6556 | if (ret) { |
6524 | btrfs_end_transaction(trans, root); | 6557 | btrfs_end_transaction(trans, root); |
6525 | return ret; | 6558 | goto out; |
6526 | } | 6559 | } |
6527 | 6560 | ||
6528 | nr = trans->blocks_used; | 6561 | nr = trans->blocks_used; |
6529 | btrfs_end_transaction(trans, root); | 6562 | btrfs_end_transaction(trans, root); |
6530 | btrfs_btree_balance_dirty(root, nr); | 6563 | btrfs_btree_balance_dirty(root, nr); |
6531 | 6564 | ||
6532 | /* Now start a transaction for the truncate */ | 6565 | /* |
6533 | trans = btrfs_start_transaction(root, 0); | 6566 | * Ok so we've already migrated our bytes over for the truncate, so here |
6534 | if (IS_ERR(trans)) | 6567 | * just reserve the one slot we need for updating the inode. |
6535 | return PTR_ERR(trans); | 6568 | */ |
6536 | btrfs_set_trans_block_group(trans, inode); | 6569 | trans = btrfs_start_transaction(root, 1); |
6537 | trans->block_rsv = root->orphan_block_rsv; | 6570 | if (IS_ERR(trans)) { |
6571 | err = PTR_ERR(trans); | ||
6572 | goto out; | ||
6573 | } | ||
6574 | trans->block_rsv = rsv; | ||
6538 | 6575 | ||
6539 | /* | 6576 | /* |
6540 | * setattr is responsible for setting the ordered_data_close flag, | 6577 | * setattr is responsible for setting the ordered_data_close flag, |
@@ -6558,24 +6595,17 @@ static int btrfs_truncate(struct inode *inode) | |||
6558 | 6595 | ||
6559 | while (1) { | 6596 | while (1) { |
6560 | if (!trans) { | 6597 | if (!trans) { |
6561 | trans = btrfs_start_transaction(root, 0); | 6598 | trans = btrfs_start_transaction(root, 3); |
6562 | if (IS_ERR(trans)) | 6599 | if (IS_ERR(trans)) { |
6563 | return PTR_ERR(trans); | 6600 | err = PTR_ERR(trans); |
6564 | btrfs_set_trans_block_group(trans, inode); | 6601 | goto out; |
6565 | trans->block_rsv = root->orphan_block_rsv; | 6602 | } |
6566 | } | ||
6567 | 6603 | ||
6568 | ret = btrfs_block_rsv_check(trans, root, | 6604 | ret = btrfs_truncate_reserve_metadata(trans, root, |
6569 | root->orphan_block_rsv, 0, 5); | 6605 | rsv); |
6570 | if (ret == -EAGAIN) { | 6606 | BUG_ON(ret); |
6571 | ret = btrfs_commit_transaction(trans, root); | 6607 | |
6572 | if (ret) | 6608 | trans->block_rsv = rsv; |
6573 | return ret; | ||
6574 | trans = NULL; | ||
6575 | continue; | ||
6576 | } else if (ret) { | ||
6577 | err = ret; | ||
6578 | break; | ||
6579 | } | 6609 | } |
6580 | 6610 | ||
6581 | ret = btrfs_truncate_inode_items(trans, root, inode, | 6611 | ret = btrfs_truncate_inode_items(trans, root, inode, |
@@ -6586,6 +6616,7 @@ static int btrfs_truncate(struct inode *inode) | |||
6586 | break; | 6616 | break; |
6587 | } | 6617 | } |
6588 | 6618 | ||
6619 | trans->block_rsv = &root->fs_info->trans_block_rsv; | ||
6589 | ret = btrfs_update_inode(trans, root, inode); | 6620 | ret = btrfs_update_inode(trans, root, inode); |
6590 | if (ret) { | 6621 | if (ret) { |
6591 | err = ret; | 6622 | err = ret; |
@@ -6599,6 +6630,7 @@ static int btrfs_truncate(struct inode *inode) | |||
6599 | } | 6630 | } |
6600 | 6631 | ||
6601 | if (ret == 0 && inode->i_nlink > 0) { | 6632 | if (ret == 0 && inode->i_nlink > 0) { |
6633 | trans->block_rsv = root->orphan_block_rsv; | ||
6602 | ret = btrfs_orphan_del(trans, inode); | 6634 | ret = btrfs_orphan_del(trans, inode); |
6603 | if (ret) | 6635 | if (ret) |
6604 | err = ret; | 6636 | err = ret; |
@@ -6610,15 +6642,20 @@ static int btrfs_truncate(struct inode *inode) | |||
6610 | ret = btrfs_orphan_del(NULL, inode); | 6642 | ret = btrfs_orphan_del(NULL, inode); |
6611 | } | 6643 | } |
6612 | 6644 | ||
6645 | trans->block_rsv = &root->fs_info->trans_block_rsv; | ||
6613 | ret = btrfs_update_inode(trans, root, inode); | 6646 | ret = btrfs_update_inode(trans, root, inode); |
6614 | if (ret && !err) | 6647 | if (ret && !err) |
6615 | err = ret; | 6648 | err = ret; |
6616 | 6649 | ||
6617 | nr = trans->blocks_used; | 6650 | nr = trans->blocks_used; |
6618 | ret = btrfs_end_transaction_throttle(trans, root); | 6651 | ret = btrfs_end_transaction_throttle(trans, root); |
6652 | btrfs_btree_balance_dirty(root, nr); | ||
6653 | |||
6654 | out: | ||
6655 | btrfs_free_block_rsv(root, rsv); | ||
6656 | |||
6619 | if (ret && !err) | 6657 | if (ret && !err) |
6620 | err = ret; | 6658 | err = ret; |
6621 | btrfs_btree_balance_dirty(root, nr); | ||
6622 | 6659 | ||
6623 | return err; | 6660 | return err; |
6624 | } | 6661 | } |
@@ -6627,15 +6664,14 @@ static int btrfs_truncate(struct inode *inode) | |||
6627 | * create a new subvolume directory/inode (helper for the ioctl). | 6664 | * create a new subvolume directory/inode (helper for the ioctl). |
6628 | */ | 6665 | */ |
6629 | int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, | 6666 | int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, |
6630 | struct btrfs_root *new_root, | 6667 | struct btrfs_root *new_root, u64 new_dirid) |
6631 | u64 new_dirid, u64 alloc_hint) | ||
6632 | { | 6668 | { |
6633 | struct inode *inode; | 6669 | struct inode *inode; |
6634 | int err; | 6670 | int err; |
6635 | u64 index = 0; | 6671 | u64 index = 0; |
6636 | 6672 | ||
6637 | inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid, | 6673 | inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid, |
6638 | new_dirid, alloc_hint, S_IFDIR | 0700, &index); | 6674 | new_dirid, S_IFDIR | 0700, &index); |
6639 | if (IS_ERR(inode)) | 6675 | if (IS_ERR(inode)) |
6640 | return PTR_ERR(inode); | 6676 | return PTR_ERR(inode); |
6641 | inode->i_op = &btrfs_dir_inode_operations; | 6677 | inode->i_op = &btrfs_dir_inode_operations; |
@@ -6748,21 +6784,6 @@ void btrfs_destroy_inode(struct inode *inode) | |||
6748 | spin_unlock(&root->fs_info->ordered_extent_lock); | 6784 | spin_unlock(&root->fs_info->ordered_extent_lock); |
6749 | } | 6785 | } |
6750 | 6786 | ||
6751 | if (root == root->fs_info->tree_root) { | ||
6752 | struct btrfs_block_group_cache *block_group; | ||
6753 | |||
6754 | block_group = btrfs_lookup_block_group(root->fs_info, | ||
6755 | BTRFS_I(inode)->block_group); | ||
6756 | if (block_group && block_group->inode == inode) { | ||
6757 | spin_lock(&block_group->lock); | ||
6758 | block_group->inode = NULL; | ||
6759 | spin_unlock(&block_group->lock); | ||
6760 | btrfs_put_block_group(block_group); | ||
6761 | } else if (block_group) { | ||
6762 | btrfs_put_block_group(block_group); | ||
6763 | } | ||
6764 | } | ||
6765 | |||
6766 | spin_lock(&root->orphan_lock); | 6787 | spin_lock(&root->orphan_lock); |
6767 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { | 6788 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { |
6768 | printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n", | 6789 | printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n", |
@@ -6948,8 +6969,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
6948 | goto out_notrans; | 6969 | goto out_notrans; |
6949 | } | 6970 | } |
6950 | 6971 | ||
6951 | btrfs_set_trans_block_group(trans, new_dir); | ||
6952 | |||
6953 | if (dest != root) | 6972 | if (dest != root) |
6954 | btrfs_record_root_in_trans(trans, dest); | 6973 | btrfs_record_root_in_trans(trans, dest); |
6955 | 6974 | ||
@@ -7131,16 +7150,13 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
7131 | if (IS_ERR(trans)) | 7150 | if (IS_ERR(trans)) |
7132 | return PTR_ERR(trans); | 7151 | return PTR_ERR(trans); |
7133 | 7152 | ||
7134 | btrfs_set_trans_block_group(trans, dir); | ||
7135 | |||
7136 | err = btrfs_find_free_ino(root, &objectid); | 7153 | err = btrfs_find_free_ino(root, &objectid); |
7137 | if (err) | 7154 | if (err) |
7138 | goto out_unlock; | 7155 | goto out_unlock; |
7139 | 7156 | ||
7140 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | 7157 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, |
7141 | dentry->d_name.len, btrfs_ino(dir), objectid, | 7158 | dentry->d_name.len, btrfs_ino(dir), objectid, |
7142 | BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO, | 7159 | S_IFLNK|S_IRWXUGO, &index); |
7143 | &index); | ||
7144 | if (IS_ERR(inode)) { | 7160 | if (IS_ERR(inode)) { |
7145 | err = PTR_ERR(inode); | 7161 | err = PTR_ERR(inode); |
7146 | goto out_unlock; | 7162 | goto out_unlock; |
@@ -7152,7 +7168,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
7152 | goto out_unlock; | 7168 | goto out_unlock; |
7153 | } | 7169 | } |
7154 | 7170 | ||
7155 | btrfs_set_trans_block_group(trans, inode); | ||
7156 | err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); | 7171 | err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); |
7157 | if (err) | 7172 | if (err) |
7158 | drop_inode = 1; | 7173 | drop_inode = 1; |
@@ -7163,8 +7178,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
7163 | inode->i_op = &btrfs_file_inode_operations; | 7178 | inode->i_op = &btrfs_file_inode_operations; |
7164 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; | 7179 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; |
7165 | } | 7180 | } |
7166 | btrfs_update_inode_block_group(trans, inode); | ||
7167 | btrfs_update_inode_block_group(trans, dir); | ||
7168 | if (drop_inode) | 7181 | if (drop_inode) |
7169 | goto out_unlock; | 7182 | goto out_unlock; |
7170 | 7183 | ||
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 85e818ce00c5..ac37040e426a 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -243,7 +243,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) | |||
243 | ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS); | 243 | ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS); |
244 | } | 244 | } |
245 | 245 | ||
246 | trans = btrfs_join_transaction(root, 1); | 246 | trans = btrfs_join_transaction(root); |
247 | BUG_ON(IS_ERR(trans)); | 247 | BUG_ON(IS_ERR(trans)); |
248 | 248 | ||
249 | ret = btrfs_update_inode(trans, root, inode); | 249 | ret = btrfs_update_inode(trans, root, inode); |
@@ -414,8 +414,7 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
414 | 414 | ||
415 | btrfs_record_root_in_trans(trans, new_root); | 415 | btrfs_record_root_in_trans(trans, new_root); |
416 | 416 | ||
417 | ret = btrfs_create_subvol_root(trans, new_root, new_dirid, | 417 | ret = btrfs_create_subvol_root(trans, new_root, new_dirid); |
418 | BTRFS_I(dir)->block_group); | ||
419 | /* | 418 | /* |
420 | * insert the directory item | 419 | * insert the directory item |
421 | */ | 420 | */ |
@@ -707,16 +706,17 @@ static int find_new_extents(struct btrfs_root *root, | |||
707 | struct btrfs_file_extent_item *extent; | 706 | struct btrfs_file_extent_item *extent; |
708 | int type; | 707 | int type; |
709 | int ret; | 708 | int ret; |
709 | u64 ino = btrfs_ino(inode); | ||
710 | 710 | ||
711 | path = btrfs_alloc_path(); | 711 | path = btrfs_alloc_path(); |
712 | if (!path) | 712 | if (!path) |
713 | return -ENOMEM; | 713 | return -ENOMEM; |
714 | 714 | ||
715 | min_key.objectid = inode->i_ino; | 715 | min_key.objectid = ino; |
716 | min_key.type = BTRFS_EXTENT_DATA_KEY; | 716 | min_key.type = BTRFS_EXTENT_DATA_KEY; |
717 | min_key.offset = *off; | 717 | min_key.offset = *off; |
718 | 718 | ||
719 | max_key.objectid = inode->i_ino; | 719 | max_key.objectid = ino; |
720 | max_key.type = (u8)-1; | 720 | max_key.type = (u8)-1; |
721 | max_key.offset = (u64)-1; | 721 | max_key.offset = (u64)-1; |
722 | 722 | ||
@@ -727,7 +727,7 @@ static int find_new_extents(struct btrfs_root *root, | |||
727 | path, 0, newer_than); | 727 | path, 0, newer_than); |
728 | if (ret != 0) | 728 | if (ret != 0) |
729 | goto none; | 729 | goto none; |
730 | if (min_key.objectid != inode->i_ino) | 730 | if (min_key.objectid != ino) |
731 | goto none; | 731 | goto none; |
732 | if (min_key.type != BTRFS_EXTENT_DATA_KEY) | 732 | if (min_key.type != BTRFS_EXTENT_DATA_KEY) |
733 | goto none; | 733 | goto none; |
@@ -2489,12 +2489,10 @@ static long btrfs_ioctl_trans_start(struct file *file) | |||
2489 | if (ret) | 2489 | if (ret) |
2490 | goto out; | 2490 | goto out; |
2491 | 2491 | ||
2492 | mutex_lock(&root->fs_info->trans_mutex); | 2492 | atomic_inc(&root->fs_info->open_ioctl_trans); |
2493 | root->fs_info->open_ioctl_trans++; | ||
2494 | mutex_unlock(&root->fs_info->trans_mutex); | ||
2495 | 2493 | ||
2496 | ret = -ENOMEM; | 2494 | ret = -ENOMEM; |
2497 | trans = btrfs_start_ioctl_transaction(root, 0); | 2495 | trans = btrfs_start_ioctl_transaction(root); |
2498 | if (IS_ERR(trans)) | 2496 | if (IS_ERR(trans)) |
2499 | goto out_drop; | 2497 | goto out_drop; |
2500 | 2498 | ||
@@ -2502,9 +2500,7 @@ static long btrfs_ioctl_trans_start(struct file *file) | |||
2502 | return 0; | 2500 | return 0; |
2503 | 2501 | ||
2504 | out_drop: | 2502 | out_drop: |
2505 | mutex_lock(&root->fs_info->trans_mutex); | 2503 | atomic_dec(&root->fs_info->open_ioctl_trans); |
2506 | root->fs_info->open_ioctl_trans--; | ||
2507 | mutex_unlock(&root->fs_info->trans_mutex); | ||
2508 | mnt_drop_write(file->f_path.mnt); | 2504 | mnt_drop_write(file->f_path.mnt); |
2509 | out: | 2505 | out: |
2510 | return ret; | 2506 | return ret; |
@@ -2738,9 +2734,7 @@ long btrfs_ioctl_trans_end(struct file *file) | |||
2738 | 2734 | ||
2739 | btrfs_end_transaction(trans, root); | 2735 | btrfs_end_transaction(trans, root); |
2740 | 2736 | ||
2741 | mutex_lock(&root->fs_info->trans_mutex); | 2737 | atomic_dec(&root->fs_info->open_ioctl_trans); |
2742 | root->fs_info->open_ioctl_trans--; | ||
2743 | mutex_unlock(&root->fs_info->trans_mutex); | ||
2744 | 2738 | ||
2745 | mnt_drop_write(file->f_path.mnt); | 2739 | mnt_drop_write(file->f_path.mnt); |
2746 | return 0; | 2740 | return 0; |
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index ca38eca70af0..b1ef27cc673b 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
@@ -677,6 +677,8 @@ struct backref_node *build_backref_tree(struct reloc_control *rc, | |||
677 | err = -ENOMEM; | 677 | err = -ENOMEM; |
678 | goto out; | 678 | goto out; |
679 | } | 679 | } |
680 | path1->reada = 1; | ||
681 | path2->reada = 2; | ||
680 | 682 | ||
681 | node = alloc_backref_node(cache); | 683 | node = alloc_backref_node(cache); |
682 | if (!node) { | 684 | if (!node) { |
@@ -1999,6 +2001,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | |||
1999 | path = btrfs_alloc_path(); | 2001 | path = btrfs_alloc_path(); |
2000 | if (!path) | 2002 | if (!path) |
2001 | return -ENOMEM; | 2003 | return -ENOMEM; |
2004 | path->reada = 1; | ||
2002 | 2005 | ||
2003 | reloc_root = root->reloc_root; | 2006 | reloc_root = root->reloc_root; |
2004 | root_item = &reloc_root->root_item; | 2007 | root_item = &reloc_root->root_item; |
@@ -2139,10 +2142,10 @@ int prepare_to_merge(struct reloc_control *rc, int err) | |||
2139 | u64 num_bytes = 0; | 2142 | u64 num_bytes = 0; |
2140 | int ret; | 2143 | int ret; |
2141 | 2144 | ||
2142 | mutex_lock(&root->fs_info->trans_mutex); | 2145 | spin_lock(&root->fs_info->trans_lock); |
2143 | rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; | 2146 | rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; |
2144 | rc->merging_rsv_size += rc->nodes_relocated * 2; | 2147 | rc->merging_rsv_size += rc->nodes_relocated * 2; |
2145 | mutex_unlock(&root->fs_info->trans_mutex); | 2148 | spin_unlock(&root->fs_info->trans_lock); |
2146 | again: | 2149 | again: |
2147 | if (!err) { | 2150 | if (!err) { |
2148 | num_bytes = rc->merging_rsv_size; | 2151 | num_bytes = rc->merging_rsv_size; |
@@ -2152,7 +2155,7 @@ again: | |||
2152 | err = ret; | 2155 | err = ret; |
2153 | } | 2156 | } |
2154 | 2157 | ||
2155 | trans = btrfs_join_transaction(rc->extent_root, 1); | 2158 | trans = btrfs_join_transaction(rc->extent_root); |
2156 | if (IS_ERR(trans)) { | 2159 | if (IS_ERR(trans)) { |
2157 | if (!err) | 2160 | if (!err) |
2158 | btrfs_block_rsv_release(rc->extent_root, | 2161 | btrfs_block_rsv_release(rc->extent_root, |
@@ -2211,9 +2214,9 @@ int merge_reloc_roots(struct reloc_control *rc) | |||
2211 | int ret; | 2214 | int ret; |
2212 | again: | 2215 | again: |
2213 | root = rc->extent_root; | 2216 | root = rc->extent_root; |
2214 | mutex_lock(&root->fs_info->trans_mutex); | 2217 | spin_lock(&root->fs_info->trans_lock); |
2215 | list_splice_init(&rc->reloc_roots, &reloc_roots); | 2218 | list_splice_init(&rc->reloc_roots, &reloc_roots); |
2216 | mutex_unlock(&root->fs_info->trans_mutex); | 2219 | spin_unlock(&root->fs_info->trans_lock); |
2217 | 2220 | ||
2218 | while (!list_empty(&reloc_roots)) { | 2221 | while (!list_empty(&reloc_roots)) { |
2219 | found = 1; | 2222 | found = 1; |
@@ -3236,7 +3239,7 @@ truncate: | |||
3236 | goto out; | 3239 | goto out; |
3237 | } | 3240 | } |
3238 | 3241 | ||
3239 | trans = btrfs_join_transaction(root, 0); | 3242 | trans = btrfs_join_transaction(root); |
3240 | if (IS_ERR(trans)) { | 3243 | if (IS_ERR(trans)) { |
3241 | btrfs_free_path(path); | 3244 | btrfs_free_path(path); |
3242 | ret = PTR_ERR(trans); | 3245 | ret = PTR_ERR(trans); |
@@ -3300,6 +3303,7 @@ static int find_data_references(struct reloc_control *rc, | |||
3300 | path = btrfs_alloc_path(); | 3303 | path = btrfs_alloc_path(); |
3301 | if (!path) | 3304 | if (!path) |
3302 | return -ENOMEM; | 3305 | return -ENOMEM; |
3306 | path->reada = 1; | ||
3303 | 3307 | ||
3304 | root = read_fs_root(rc->extent_root->fs_info, ref_root); | 3308 | root = read_fs_root(rc->extent_root->fs_info, ref_root); |
3305 | if (IS_ERR(root)) { | 3309 | if (IS_ERR(root)) { |
@@ -3586,17 +3590,17 @@ next: | |||
3586 | static void set_reloc_control(struct reloc_control *rc) | 3590 | static void set_reloc_control(struct reloc_control *rc) |
3587 | { | 3591 | { |
3588 | struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; | 3592 | struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; |
3589 | mutex_lock(&fs_info->trans_mutex); | 3593 | spin_lock(&fs_info->trans_lock); |
3590 | fs_info->reloc_ctl = rc; | 3594 | fs_info->reloc_ctl = rc; |
3591 | mutex_unlock(&fs_info->trans_mutex); | 3595 | spin_unlock(&fs_info->trans_lock); |
3592 | } | 3596 | } |
3593 | 3597 | ||
3594 | static void unset_reloc_control(struct reloc_control *rc) | 3598 | static void unset_reloc_control(struct reloc_control *rc) |
3595 | { | 3599 | { |
3596 | struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; | 3600 | struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; |
3597 | mutex_lock(&fs_info->trans_mutex); | 3601 | spin_lock(&fs_info->trans_lock); |
3598 | fs_info->reloc_ctl = NULL; | 3602 | fs_info->reloc_ctl = NULL; |
3599 | mutex_unlock(&fs_info->trans_mutex); | 3603 | spin_unlock(&fs_info->trans_lock); |
3600 | } | 3604 | } |
3601 | 3605 | ||
3602 | static int check_extent_flags(u64 flags) | 3606 | static int check_extent_flags(u64 flags) |
@@ -3645,7 +3649,7 @@ int prepare_to_relocate(struct reloc_control *rc) | |||
3645 | rc->create_reloc_tree = 1; | 3649 | rc->create_reloc_tree = 1; |
3646 | set_reloc_control(rc); | 3650 | set_reloc_control(rc); |
3647 | 3651 | ||
3648 | trans = btrfs_join_transaction(rc->extent_root, 1); | 3652 | trans = btrfs_join_transaction(rc->extent_root); |
3649 | BUG_ON(IS_ERR(trans)); | 3653 | BUG_ON(IS_ERR(trans)); |
3650 | btrfs_commit_transaction(trans, rc->extent_root); | 3654 | btrfs_commit_transaction(trans, rc->extent_root); |
3651 | return 0; | 3655 | return 0; |
@@ -3668,6 +3672,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
3668 | path = btrfs_alloc_path(); | 3672 | path = btrfs_alloc_path(); |
3669 | if (!path) | 3673 | if (!path) |
3670 | return -ENOMEM; | 3674 | return -ENOMEM; |
3675 | path->reada = 1; | ||
3671 | 3676 | ||
3672 | ret = prepare_to_relocate(rc); | 3677 | ret = prepare_to_relocate(rc); |
3673 | if (ret) { | 3678 | if (ret) { |
@@ -3834,7 +3839,7 @@ restart: | |||
3834 | btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1); | 3839 | btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1); |
3835 | 3840 | ||
3836 | /* get rid of pinned extents */ | 3841 | /* get rid of pinned extents */ |
3837 | trans = btrfs_join_transaction(rc->extent_root, 1); | 3842 | trans = btrfs_join_transaction(rc->extent_root); |
3838 | if (IS_ERR(trans)) | 3843 | if (IS_ERR(trans)) |
3839 | err = PTR_ERR(trans); | 3844 | err = PTR_ERR(trans); |
3840 | else | 3845 | else |
@@ -4093,6 +4098,7 @@ int btrfs_recover_relocation(struct btrfs_root *root) | |||
4093 | path = btrfs_alloc_path(); | 4098 | path = btrfs_alloc_path(); |
4094 | if (!path) | 4099 | if (!path) |
4095 | return -ENOMEM; | 4100 | return -ENOMEM; |
4101 | path->reada = -1; | ||
4096 | 4102 | ||
4097 | key.objectid = BTRFS_TREE_RELOC_OBJECTID; | 4103 | key.objectid = BTRFS_TREE_RELOC_OBJECTID; |
4098 | key.type = BTRFS_ROOT_ITEM_KEY; | 4104 | key.type = BTRFS_ROOT_ITEM_KEY; |
@@ -4159,7 +4165,7 @@ int btrfs_recover_relocation(struct btrfs_root *root) | |||
4159 | 4165 | ||
4160 | set_reloc_control(rc); | 4166 | set_reloc_control(rc); |
4161 | 4167 | ||
4162 | trans = btrfs_join_transaction(rc->extent_root, 1); | 4168 | trans = btrfs_join_transaction(rc->extent_root); |
4163 | if (IS_ERR(trans)) { | 4169 | if (IS_ERR(trans)) { |
4164 | unset_reloc_control(rc); | 4170 | unset_reloc_control(rc); |
4165 | err = PTR_ERR(trans); | 4171 | err = PTR_ERR(trans); |
@@ -4193,7 +4199,7 @@ int btrfs_recover_relocation(struct btrfs_root *root) | |||
4193 | 4199 | ||
4194 | unset_reloc_control(rc); | 4200 | unset_reloc_control(rc); |
4195 | 4201 | ||
4196 | trans = btrfs_join_transaction(rc->extent_root, 1); | 4202 | trans = btrfs_join_transaction(rc->extent_root); |
4197 | if (IS_ERR(trans)) | 4203 | if (IS_ERR(trans)) |
4198 | err = PTR_ERR(trans); | 4204 | err = PTR_ERR(trans); |
4199 | else | 4205 | else |
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 6dfed0c27ac3..df50fd1eca8f 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c | |||
@@ -117,33 +117,37 @@ static void scrub_free_csums(struct scrub_dev *sdev) | |||
117 | } | 117 | } |
118 | } | 118 | } |
119 | 119 | ||
120 | static void scrub_free_bio(struct bio *bio) | ||
121 | { | ||
122 | int i; | ||
123 | struct page *last_page = NULL; | ||
124 | |||
125 | if (!bio) | ||
126 | return; | ||
127 | |||
128 | for (i = 0; i < bio->bi_vcnt; ++i) { | ||
129 | if (bio->bi_io_vec[i].bv_page == last_page) | ||
130 | continue; | ||
131 | last_page = bio->bi_io_vec[i].bv_page; | ||
132 | __free_page(last_page); | ||
133 | } | ||
134 | bio_put(bio); | ||
135 | } | ||
136 | |||
120 | static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev) | 137 | static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev) |
121 | { | 138 | { |
122 | int i; | 139 | int i; |
123 | int j; | ||
124 | struct page *last_page; | ||
125 | 140 | ||
126 | if (!sdev) | 141 | if (!sdev) |
127 | return; | 142 | return; |
128 | 143 | ||
129 | for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) { | 144 | for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) { |
130 | struct scrub_bio *sbio = sdev->bios[i]; | 145 | struct scrub_bio *sbio = sdev->bios[i]; |
131 | struct bio *bio; | ||
132 | 146 | ||
133 | if (!sbio) | 147 | if (!sbio) |
134 | break; | 148 | break; |
135 | 149 | ||
136 | bio = sbio->bio; | 150 | scrub_free_bio(sbio->bio); |
137 | if (bio) { | ||
138 | last_page = NULL; | ||
139 | for (j = 0; j < bio->bi_vcnt; ++j) { | ||
140 | if (bio->bi_io_vec[j].bv_page == last_page) | ||
141 | continue; | ||
142 | last_page = bio->bi_io_vec[j].bv_page; | ||
143 | __free_page(last_page); | ||
144 | } | ||
145 | bio_put(bio); | ||
146 | } | ||
147 | kfree(sbio); | 151 | kfree(sbio); |
148 | } | 152 | } |
149 | 153 | ||
@@ -156,8 +160,6 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev) | |||
156 | { | 160 | { |
157 | struct scrub_dev *sdev; | 161 | struct scrub_dev *sdev; |
158 | int i; | 162 | int i; |
159 | int j; | ||
160 | int ret; | ||
161 | struct btrfs_fs_info *fs_info = dev->dev_root->fs_info; | 163 | struct btrfs_fs_info *fs_info = dev->dev_root->fs_info; |
162 | 164 | ||
163 | sdev = kzalloc(sizeof(*sdev), GFP_NOFS); | 165 | sdev = kzalloc(sizeof(*sdev), GFP_NOFS); |
@@ -165,7 +167,6 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev) | |||
165 | goto nomem; | 167 | goto nomem; |
166 | sdev->dev = dev; | 168 | sdev->dev = dev; |
167 | for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) { | 169 | for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) { |
168 | struct bio *bio; | ||
169 | struct scrub_bio *sbio; | 170 | struct scrub_bio *sbio; |
170 | 171 | ||
171 | sbio = kzalloc(sizeof(*sbio), GFP_NOFS); | 172 | sbio = kzalloc(sizeof(*sbio), GFP_NOFS); |
@@ -173,32 +174,10 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev) | |||
173 | goto nomem; | 174 | goto nomem; |
174 | sdev->bios[i] = sbio; | 175 | sdev->bios[i] = sbio; |
175 | 176 | ||
176 | bio = bio_kmalloc(GFP_NOFS, SCRUB_PAGES_PER_BIO); | ||
177 | if (!bio) | ||
178 | goto nomem; | ||
179 | |||
180 | sbio->index = i; | 177 | sbio->index = i; |
181 | sbio->sdev = sdev; | 178 | sbio->sdev = sdev; |
182 | sbio->bio = bio; | ||
183 | sbio->count = 0; | 179 | sbio->count = 0; |
184 | sbio->work.func = scrub_checksum; | 180 | sbio->work.func = scrub_checksum; |
185 | bio->bi_private = sdev->bios[i]; | ||
186 | bio->bi_end_io = scrub_bio_end_io; | ||
187 | bio->bi_sector = 0; | ||
188 | bio->bi_bdev = dev->bdev; | ||
189 | bio->bi_size = 0; | ||
190 | |||
191 | for (j = 0; j < SCRUB_PAGES_PER_BIO; ++j) { | ||
192 | struct page *page; | ||
193 | page = alloc_page(GFP_NOFS); | ||
194 | if (!page) | ||
195 | goto nomem; | ||
196 | |||
197 | ret = bio_add_page(bio, page, PAGE_SIZE, 0); | ||
198 | if (!ret) | ||
199 | goto nomem; | ||
200 | } | ||
201 | WARN_ON(bio->bi_vcnt != SCRUB_PAGES_PER_BIO); | ||
202 | 181 | ||
203 | if (i != SCRUB_BIOS_PER_DEV-1) | 182 | if (i != SCRUB_BIOS_PER_DEV-1) |
204 | sdev->bios[i]->next_free = i + 1; | 183 | sdev->bios[i]->next_free = i + 1; |
@@ -369,9 +348,6 @@ static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector, | |||
369 | int ret; | 348 | int ret; |
370 | DECLARE_COMPLETION_ONSTACK(complete); | 349 | DECLARE_COMPLETION_ONSTACK(complete); |
371 | 350 | ||
372 | /* we are going to wait on this IO */ | ||
373 | rw |= REQ_SYNC; | ||
374 | |||
375 | bio = bio_alloc(GFP_NOFS, 1); | 351 | bio = bio_alloc(GFP_NOFS, 1); |
376 | bio->bi_bdev = bdev; | 352 | bio->bi_bdev = bdev; |
377 | bio->bi_sector = sector; | 353 | bio->bi_sector = sector; |
@@ -380,6 +356,7 @@ static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector, | |||
380 | bio->bi_private = &complete; | 356 | bio->bi_private = &complete; |
381 | submit_bio(rw, bio); | 357 | submit_bio(rw, bio); |
382 | 358 | ||
359 | /* this will also unplug the queue */ | ||
383 | wait_for_completion(&complete); | 360 | wait_for_completion(&complete); |
384 | 361 | ||
385 | ret = !test_bit(BIO_UPTODATE, &bio->bi_flags); | 362 | ret = !test_bit(BIO_UPTODATE, &bio->bi_flags); |
@@ -394,6 +371,7 @@ static void scrub_bio_end_io(struct bio *bio, int err) | |||
394 | struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info; | 371 | struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info; |
395 | 372 | ||
396 | sbio->err = err; | 373 | sbio->err = err; |
374 | sbio->bio = bio; | ||
397 | 375 | ||
398 | btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work); | 376 | btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work); |
399 | } | 377 | } |
@@ -453,6 +431,8 @@ static void scrub_checksum(struct btrfs_work *work) | |||
453 | } | 431 | } |
454 | 432 | ||
455 | out: | 433 | out: |
434 | scrub_free_bio(sbio->bio); | ||
435 | sbio->bio = NULL; | ||
456 | spin_lock(&sdev->list_lock); | 436 | spin_lock(&sdev->list_lock); |
457 | sbio->next_free = sdev->first_free; | 437 | sbio->next_free = sdev->first_free; |
458 | sdev->first_free = sbio->index; | 438 | sdev->first_free = sbio->index; |
@@ -583,25 +563,50 @@ static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer) | |||
583 | static int scrub_submit(struct scrub_dev *sdev) | 563 | static int scrub_submit(struct scrub_dev *sdev) |
584 | { | 564 | { |
585 | struct scrub_bio *sbio; | 565 | struct scrub_bio *sbio; |
566 | struct bio *bio; | ||
567 | int i; | ||
586 | 568 | ||
587 | if (sdev->curr == -1) | 569 | if (sdev->curr == -1) |
588 | return 0; | 570 | return 0; |
589 | 571 | ||
590 | sbio = sdev->bios[sdev->curr]; | 572 | sbio = sdev->bios[sdev->curr]; |
591 | 573 | ||
592 | sbio->bio->bi_sector = sbio->physical >> 9; | 574 | bio = bio_alloc(GFP_NOFS, sbio->count); |
593 | sbio->bio->bi_size = sbio->count * PAGE_SIZE; | 575 | if (!bio) |
594 | sbio->bio->bi_next = NULL; | 576 | goto nomem; |
595 | sbio->bio->bi_flags |= 1 << BIO_UPTODATE; | 577 | |
596 | sbio->bio->bi_comp_cpu = -1; | 578 | bio->bi_private = sbio; |
597 | sbio->bio->bi_bdev = sdev->dev->bdev; | 579 | bio->bi_end_io = scrub_bio_end_io; |
580 | bio->bi_bdev = sdev->dev->bdev; | ||
581 | bio->bi_sector = sbio->physical >> 9; | ||
582 | |||
583 | for (i = 0; i < sbio->count; ++i) { | ||
584 | struct page *page; | ||
585 | int ret; | ||
586 | |||
587 | page = alloc_page(GFP_NOFS); | ||
588 | if (!page) | ||
589 | goto nomem; | ||
590 | |||
591 | ret = bio_add_page(bio, page, PAGE_SIZE, 0); | ||
592 | if (!ret) { | ||
593 | __free_page(page); | ||
594 | goto nomem; | ||
595 | } | ||
596 | } | ||
597 | |||
598 | sbio->err = 0; | 598 | sbio->err = 0; |
599 | sdev->curr = -1; | 599 | sdev->curr = -1; |
600 | atomic_inc(&sdev->in_flight); | 600 | atomic_inc(&sdev->in_flight); |
601 | 601 | ||
602 | submit_bio(0, sbio->bio); | 602 | submit_bio(READ, bio); |
603 | 603 | ||
604 | return 0; | 604 | return 0; |
605 | |||
606 | nomem: | ||
607 | scrub_free_bio(bio); | ||
608 | |||
609 | return -ENOMEM; | ||
605 | } | 610 | } |
606 | 611 | ||
607 | static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len, | 612 | static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len, |
@@ -633,7 +638,11 @@ again: | |||
633 | sbio->logical = logical; | 638 | sbio->logical = logical; |
634 | } else if (sbio->physical + sbio->count * PAGE_SIZE != physical || | 639 | } else if (sbio->physical + sbio->count * PAGE_SIZE != physical || |
635 | sbio->logical + sbio->count * PAGE_SIZE != logical) { | 640 | sbio->logical + sbio->count * PAGE_SIZE != logical) { |
636 | scrub_submit(sdev); | 641 | int ret; |
642 | |||
643 | ret = scrub_submit(sdev); | ||
644 | if (ret) | ||
645 | return ret; | ||
637 | goto again; | 646 | goto again; |
638 | } | 647 | } |
639 | sbio->spag[sbio->count].flags = flags; | 648 | sbio->spag[sbio->count].flags = flags; |
@@ -645,8 +654,13 @@ again: | |||
645 | memcpy(sbio->spag[sbio->count].csum, csum, sdev->csum_size); | 654 | memcpy(sbio->spag[sbio->count].csum, csum, sdev->csum_size); |
646 | } | 655 | } |
647 | ++sbio->count; | 656 | ++sbio->count; |
648 | if (sbio->count == SCRUB_PAGES_PER_BIO || force) | 657 | if (sbio->count == SCRUB_PAGES_PER_BIO || force) { |
649 | scrub_submit(sdev); | 658 | int ret; |
659 | |||
660 | ret = scrub_submit(sdev); | ||
661 | if (ret) | ||
662 | return ret; | ||
663 | } | ||
650 | 664 | ||
651 | return 0; | 665 | return 0; |
652 | } | 666 | } |
@@ -727,6 +741,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev, | |||
727 | struct btrfs_root *root = fs_info->extent_root; | 741 | struct btrfs_root *root = fs_info->extent_root; |
728 | struct btrfs_root *csum_root = fs_info->csum_root; | 742 | struct btrfs_root *csum_root = fs_info->csum_root; |
729 | struct btrfs_extent_item *extent; | 743 | struct btrfs_extent_item *extent; |
744 | struct blk_plug plug; | ||
730 | u64 flags; | 745 | u64 flags; |
731 | int ret; | 746 | int ret; |
732 | int slot; | 747 | int slot; |
@@ -831,6 +846,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev, | |||
831 | * the scrub. This might currently (crc32) end up to be about 1MB | 846 | * the scrub. This might currently (crc32) end up to be about 1MB |
832 | */ | 847 | */ |
833 | start_stripe = 0; | 848 | start_stripe = 0; |
849 | blk_start_plug(&plug); | ||
834 | again: | 850 | again: |
835 | logical = base + offset + start_stripe * increment; | 851 | logical = base + offset + start_stripe * increment; |
836 | for (i = start_stripe; i < nstripes; ++i) { | 852 | for (i = start_stripe; i < nstripes; ++i) { |
@@ -972,6 +988,7 @@ next: | |||
972 | scrub_submit(sdev); | 988 | scrub_submit(sdev); |
973 | 989 | ||
974 | out: | 990 | out: |
991 | blk_finish_plug(&plug); | ||
975 | btrfs_free_path(path); | 992 | btrfs_free_path(path); |
976 | return ret < 0 ? ret : 0; | 993 | return ret < 0 ? ret : 0; |
977 | } | 994 | } |
@@ -1166,7 +1183,7 @@ int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end, | |||
1166 | int ret; | 1183 | int ret; |
1167 | struct btrfs_device *dev; | 1184 | struct btrfs_device *dev; |
1168 | 1185 | ||
1169 | if (root->fs_info->closing) | 1186 | if (btrfs_fs_closing(root->fs_info)) |
1170 | return -EINVAL; | 1187 | return -EINVAL; |
1171 | 1188 | ||
1172 | /* | 1189 | /* |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 9b2e7e5bc3ef..117e74e3604b 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -161,7 +161,8 @@ enum { | |||
161 | Opt_compress_type, Opt_compress_force, Opt_compress_force_type, | 161 | Opt_compress_type, Opt_compress_force, Opt_compress_force_type, |
162 | Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, | 162 | Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, |
163 | Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, | 163 | Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, |
164 | Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_err, | 164 | Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, |
165 | Opt_inode_cache, Opt_err, | ||
165 | }; | 166 | }; |
166 | 167 | ||
167 | static match_table_t tokens = { | 168 | static match_table_t tokens = { |
@@ -193,6 +194,7 @@ static match_table_t tokens = { | |||
193 | {Opt_enospc_debug, "enospc_debug"}, | 194 | {Opt_enospc_debug, "enospc_debug"}, |
194 | {Opt_subvolrootid, "subvolrootid=%d"}, | 195 | {Opt_subvolrootid, "subvolrootid=%d"}, |
195 | {Opt_defrag, "autodefrag"}, | 196 | {Opt_defrag, "autodefrag"}, |
197 | {Opt_inode_cache, "inode_cache"}, | ||
196 | {Opt_err, NULL}, | 198 | {Opt_err, NULL}, |
197 | }; | 199 | }; |
198 | 200 | ||
@@ -361,6 +363,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
361 | printk(KERN_INFO "btrfs: enabling disk space caching\n"); | 363 | printk(KERN_INFO "btrfs: enabling disk space caching\n"); |
362 | btrfs_set_opt(info->mount_opt, SPACE_CACHE); | 364 | btrfs_set_opt(info->mount_opt, SPACE_CACHE); |
363 | break; | 365 | break; |
366 | case Opt_inode_cache: | ||
367 | printk(KERN_INFO "btrfs: enabling inode map caching\n"); | ||
368 | btrfs_set_opt(info->mount_opt, INODE_MAP_CACHE); | ||
369 | break; | ||
364 | case Opt_clear_cache: | 370 | case Opt_clear_cache: |
365 | printk(KERN_INFO "btrfs: force clearing of disk cache\n"); | 371 | printk(KERN_INFO "btrfs: force clearing of disk cache\n"); |
366 | btrfs_set_opt(info->mount_opt, CLEAR_CACHE); | 372 | btrfs_set_opt(info->mount_opt, CLEAR_CACHE); |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index dc80f7156923..dd719662340e 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -35,6 +35,7 @@ static noinline void put_transaction(struct btrfs_transaction *transaction) | |||
35 | { | 35 | { |
36 | WARN_ON(atomic_read(&transaction->use_count) == 0); | 36 | WARN_ON(atomic_read(&transaction->use_count) == 0); |
37 | if (atomic_dec_and_test(&transaction->use_count)) { | 37 | if (atomic_dec_and_test(&transaction->use_count)) { |
38 | BUG_ON(!list_empty(&transaction->list)); | ||
38 | memset(transaction, 0, sizeof(*transaction)); | 39 | memset(transaction, 0, sizeof(*transaction)); |
39 | kmem_cache_free(btrfs_transaction_cachep, transaction); | 40 | kmem_cache_free(btrfs_transaction_cachep, transaction); |
40 | } | 41 | } |
@@ -49,46 +50,72 @@ static noinline void switch_commit_root(struct btrfs_root *root) | |||
49 | /* | 50 | /* |
50 | * either allocate a new transaction or hop into the existing one | 51 | * either allocate a new transaction or hop into the existing one |
51 | */ | 52 | */ |
52 | static noinline int join_transaction(struct btrfs_root *root) | 53 | static noinline int join_transaction(struct btrfs_root *root, int nofail) |
53 | { | 54 | { |
54 | struct btrfs_transaction *cur_trans; | 55 | struct btrfs_transaction *cur_trans; |
56 | |||
57 | spin_lock(&root->fs_info->trans_lock); | ||
58 | if (root->fs_info->trans_no_join) { | ||
59 | if (!nofail) { | ||
60 | spin_unlock(&root->fs_info->trans_lock); | ||
61 | return -EBUSY; | ||
62 | } | ||
63 | } | ||
64 | |||
55 | cur_trans = root->fs_info->running_transaction; | 65 | cur_trans = root->fs_info->running_transaction; |
56 | if (!cur_trans) { | 66 | if (cur_trans) { |
57 | cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, | 67 | atomic_inc(&cur_trans->use_count); |
58 | GFP_NOFS); | ||
59 | if (!cur_trans) | ||
60 | return -ENOMEM; | ||
61 | root->fs_info->generation++; | ||
62 | atomic_set(&cur_trans->num_writers, 1); | ||
63 | cur_trans->num_joined = 0; | ||
64 | cur_trans->transid = root->fs_info->generation; | ||
65 | init_waitqueue_head(&cur_trans->writer_wait); | ||
66 | init_waitqueue_head(&cur_trans->commit_wait); | ||
67 | cur_trans->in_commit = 0; | ||
68 | cur_trans->blocked = 0; | ||
69 | atomic_set(&cur_trans->use_count, 1); | ||
70 | cur_trans->commit_done = 0; | ||
71 | cur_trans->start_time = get_seconds(); | ||
72 | |||
73 | cur_trans->delayed_refs.root = RB_ROOT; | ||
74 | cur_trans->delayed_refs.num_entries = 0; | ||
75 | cur_trans->delayed_refs.num_heads_ready = 0; | ||
76 | cur_trans->delayed_refs.num_heads = 0; | ||
77 | cur_trans->delayed_refs.flushing = 0; | ||
78 | cur_trans->delayed_refs.run_delayed_start = 0; | ||
79 | spin_lock_init(&cur_trans->delayed_refs.lock); | ||
80 | |||
81 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); | ||
82 | list_add_tail(&cur_trans->list, &root->fs_info->trans_list); | ||
83 | extent_io_tree_init(&cur_trans->dirty_pages, | ||
84 | root->fs_info->btree_inode->i_mapping); | ||
85 | spin_lock(&root->fs_info->new_trans_lock); | ||
86 | root->fs_info->running_transaction = cur_trans; | ||
87 | spin_unlock(&root->fs_info->new_trans_lock); | ||
88 | } else { | ||
89 | atomic_inc(&cur_trans->num_writers); | 68 | atomic_inc(&cur_trans->num_writers); |
90 | cur_trans->num_joined++; | 69 | cur_trans->num_joined++; |
70 | spin_unlock(&root->fs_info->trans_lock); | ||
71 | return 0; | ||
91 | } | 72 | } |
73 | spin_unlock(&root->fs_info->trans_lock); | ||
74 | |||
75 | cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS); | ||
76 | if (!cur_trans) | ||
77 | return -ENOMEM; | ||
78 | spin_lock(&root->fs_info->trans_lock); | ||
79 | if (root->fs_info->running_transaction) { | ||
80 | kmem_cache_free(btrfs_transaction_cachep, cur_trans); | ||
81 | cur_trans = root->fs_info->running_transaction; | ||
82 | atomic_inc(&cur_trans->use_count); | ||
83 | atomic_inc(&cur_trans->num_writers); | ||
84 | cur_trans->num_joined++; | ||
85 | spin_unlock(&root->fs_info->trans_lock); | ||
86 | return 0; | ||
87 | } | ||
88 | atomic_set(&cur_trans->num_writers, 1); | ||
89 | cur_trans->num_joined = 0; | ||
90 | init_waitqueue_head(&cur_trans->writer_wait); | ||
91 | init_waitqueue_head(&cur_trans->commit_wait); | ||
92 | cur_trans->in_commit = 0; | ||
93 | cur_trans->blocked = 0; | ||
94 | /* | ||
95 | * One for this trans handle, one so it will live on until we | ||
96 | * commit the transaction. | ||
97 | */ | ||
98 | atomic_set(&cur_trans->use_count, 2); | ||
99 | cur_trans->commit_done = 0; | ||
100 | cur_trans->start_time = get_seconds(); | ||
101 | |||
102 | cur_trans->delayed_refs.root = RB_ROOT; | ||
103 | cur_trans->delayed_refs.num_entries = 0; | ||
104 | cur_trans->delayed_refs.num_heads_ready = 0; | ||
105 | cur_trans->delayed_refs.num_heads = 0; | ||
106 | cur_trans->delayed_refs.flushing = 0; | ||
107 | cur_trans->delayed_refs.run_delayed_start = 0; | ||
108 | spin_lock_init(&cur_trans->commit_lock); | ||
109 | spin_lock_init(&cur_trans->delayed_refs.lock); | ||
110 | |||
111 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); | ||
112 | list_add_tail(&cur_trans->list, &root->fs_info->trans_list); | ||
113 | extent_io_tree_init(&cur_trans->dirty_pages, | ||
114 | root->fs_info->btree_inode->i_mapping); | ||
115 | root->fs_info->generation++; | ||
116 | cur_trans->transid = root->fs_info->generation; | ||
117 | root->fs_info->running_transaction = cur_trans; | ||
118 | spin_unlock(&root->fs_info->trans_lock); | ||
92 | 119 | ||
93 | return 0; | 120 | return 0; |
94 | } | 121 | } |
@@ -99,39 +126,28 @@ static noinline int join_transaction(struct btrfs_root *root) | |||
99 | * to make sure the old root from before we joined the transaction is deleted | 126 | * to make sure the old root from before we joined the transaction is deleted |
100 | * when the transaction commits | 127 | * when the transaction commits |
101 | */ | 128 | */ |
102 | static noinline int record_root_in_trans(struct btrfs_trans_handle *trans, | 129 | int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, |
103 | struct btrfs_root *root) | 130 | struct btrfs_root *root) |
104 | { | 131 | { |
105 | if (root->ref_cows && root->last_trans < trans->transid) { | 132 | if (root->ref_cows && root->last_trans < trans->transid) { |
106 | WARN_ON(root == root->fs_info->extent_root); | 133 | WARN_ON(root == root->fs_info->extent_root); |
107 | WARN_ON(root->commit_root != root->node); | 134 | WARN_ON(root->commit_root != root->node); |
108 | 135 | ||
136 | spin_lock(&root->fs_info->fs_roots_radix_lock); | ||
137 | if (root->last_trans == trans->transid) { | ||
138 | spin_unlock(&root->fs_info->fs_roots_radix_lock); | ||
139 | return 0; | ||
140 | } | ||
141 | root->last_trans = trans->transid; | ||
109 | radix_tree_tag_set(&root->fs_info->fs_roots_radix, | 142 | radix_tree_tag_set(&root->fs_info->fs_roots_radix, |
110 | (unsigned long)root->root_key.objectid, | 143 | (unsigned long)root->root_key.objectid, |
111 | BTRFS_ROOT_TRANS_TAG); | 144 | BTRFS_ROOT_TRANS_TAG); |
112 | root->last_trans = trans->transid; | 145 | spin_unlock(&root->fs_info->fs_roots_radix_lock); |
113 | btrfs_init_reloc_root(trans, root); | 146 | btrfs_init_reloc_root(trans, root); |
114 | } | 147 | } |
115 | return 0; | 148 | return 0; |
116 | } | 149 | } |
117 | 150 | ||
118 | int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, | ||
119 | struct btrfs_root *root) | ||
120 | { | ||
121 | if (!root->ref_cows) | ||
122 | return 0; | ||
123 | |||
124 | mutex_lock(&root->fs_info->trans_mutex); | ||
125 | if (root->last_trans == trans->transid) { | ||
126 | mutex_unlock(&root->fs_info->trans_mutex); | ||
127 | return 0; | ||
128 | } | ||
129 | |||
130 | record_root_in_trans(trans, root); | ||
131 | mutex_unlock(&root->fs_info->trans_mutex); | ||
132 | return 0; | ||
133 | } | ||
134 | |||
135 | /* wait for commit against the current transaction to become unblocked | 151 | /* wait for commit against the current transaction to become unblocked |
136 | * when this is done, it is safe to start a new transaction, but the current | 152 | * when this is done, it is safe to start a new transaction, but the current |
137 | * transaction might not be fully on disk. | 153 | * transaction might not be fully on disk. |
@@ -140,21 +156,23 @@ static void wait_current_trans(struct btrfs_root *root) | |||
140 | { | 156 | { |
141 | struct btrfs_transaction *cur_trans; | 157 | struct btrfs_transaction *cur_trans; |
142 | 158 | ||
159 | spin_lock(&root->fs_info->trans_lock); | ||
143 | cur_trans = root->fs_info->running_transaction; | 160 | cur_trans = root->fs_info->running_transaction; |
144 | if (cur_trans && cur_trans->blocked) { | 161 | if (cur_trans && cur_trans->blocked) { |
145 | DEFINE_WAIT(wait); | 162 | DEFINE_WAIT(wait); |
146 | atomic_inc(&cur_trans->use_count); | 163 | atomic_inc(&cur_trans->use_count); |
164 | spin_unlock(&root->fs_info->trans_lock); | ||
147 | while (1) { | 165 | while (1) { |
148 | prepare_to_wait(&root->fs_info->transaction_wait, &wait, | 166 | prepare_to_wait(&root->fs_info->transaction_wait, &wait, |
149 | TASK_UNINTERRUPTIBLE); | 167 | TASK_UNINTERRUPTIBLE); |
150 | if (!cur_trans->blocked) | 168 | if (!cur_trans->blocked) |
151 | break; | 169 | break; |
152 | mutex_unlock(&root->fs_info->trans_mutex); | ||
153 | schedule(); | 170 | schedule(); |
154 | mutex_lock(&root->fs_info->trans_mutex); | ||
155 | } | 171 | } |
156 | finish_wait(&root->fs_info->transaction_wait, &wait); | 172 | finish_wait(&root->fs_info->transaction_wait, &wait); |
157 | put_transaction(cur_trans); | 173 | put_transaction(cur_trans); |
174 | } else { | ||
175 | spin_unlock(&root->fs_info->trans_lock); | ||
158 | } | 176 | } |
159 | } | 177 | } |
160 | 178 | ||
@@ -167,10 +185,16 @@ enum btrfs_trans_type { | |||
167 | 185 | ||
168 | static int may_wait_transaction(struct btrfs_root *root, int type) | 186 | static int may_wait_transaction(struct btrfs_root *root, int type) |
169 | { | 187 | { |
170 | if (!root->fs_info->log_root_recovering && | 188 | if (root->fs_info->log_root_recovering) |
171 | ((type == TRANS_START && !root->fs_info->open_ioctl_trans) || | 189 | return 0; |
172 | type == TRANS_USERSPACE)) | 190 | |
191 | if (type == TRANS_USERSPACE) | ||
192 | return 1; | ||
193 | |||
194 | if (type == TRANS_START && | ||
195 | !atomic_read(&root->fs_info->open_ioctl_trans)) | ||
173 | return 1; | 196 | return 1; |
197 | |||
174 | return 0; | 198 | return 0; |
175 | } | 199 | } |
176 | 200 | ||
@@ -184,36 +208,44 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | |||
184 | 208 | ||
185 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) | 209 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) |
186 | return ERR_PTR(-EROFS); | 210 | return ERR_PTR(-EROFS); |
211 | |||
212 | if (current->journal_info) { | ||
213 | WARN_ON(type != TRANS_JOIN && type != TRANS_JOIN_NOLOCK); | ||
214 | h = current->journal_info; | ||
215 | h->use_count++; | ||
216 | h->orig_rsv = h->block_rsv; | ||
217 | h->block_rsv = NULL; | ||
218 | goto got_it; | ||
219 | } | ||
187 | again: | 220 | again: |
188 | h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); | 221 | h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); |
189 | if (!h) | 222 | if (!h) |
190 | return ERR_PTR(-ENOMEM); | 223 | return ERR_PTR(-ENOMEM); |
191 | 224 | ||
192 | if (type != TRANS_JOIN_NOLOCK) | ||
193 | mutex_lock(&root->fs_info->trans_mutex); | ||
194 | if (may_wait_transaction(root, type)) | 225 | if (may_wait_transaction(root, type)) |
195 | wait_current_trans(root); | 226 | wait_current_trans(root); |
196 | 227 | ||
197 | ret = join_transaction(root); | 228 | do { |
229 | ret = join_transaction(root, type == TRANS_JOIN_NOLOCK); | ||
230 | if (ret == -EBUSY) | ||
231 | wait_current_trans(root); | ||
232 | } while (ret == -EBUSY); | ||
233 | |||
198 | if (ret < 0) { | 234 | if (ret < 0) { |
199 | kmem_cache_free(btrfs_trans_handle_cachep, h); | 235 | kmem_cache_free(btrfs_trans_handle_cachep, h); |
200 | if (type != TRANS_JOIN_NOLOCK) | ||
201 | mutex_unlock(&root->fs_info->trans_mutex); | ||
202 | return ERR_PTR(ret); | 236 | return ERR_PTR(ret); |
203 | } | 237 | } |
204 | 238 | ||
205 | cur_trans = root->fs_info->running_transaction; | 239 | cur_trans = root->fs_info->running_transaction; |
206 | atomic_inc(&cur_trans->use_count); | ||
207 | if (type != TRANS_JOIN_NOLOCK) | ||
208 | mutex_unlock(&root->fs_info->trans_mutex); | ||
209 | 240 | ||
210 | h->transid = cur_trans->transid; | 241 | h->transid = cur_trans->transid; |
211 | h->transaction = cur_trans; | 242 | h->transaction = cur_trans; |
212 | h->blocks_used = 0; | 243 | h->blocks_used = 0; |
213 | h->block_group = 0; | ||
214 | h->bytes_reserved = 0; | 244 | h->bytes_reserved = 0; |
215 | h->delayed_ref_updates = 0; | 245 | h->delayed_ref_updates = 0; |
246 | h->use_count = 1; | ||
216 | h->block_rsv = NULL; | 247 | h->block_rsv = NULL; |
248 | h->orig_rsv = NULL; | ||
217 | 249 | ||
218 | smp_mb(); | 250 | smp_mb(); |
219 | if (cur_trans->blocked && may_wait_transaction(root, type)) { | 251 | if (cur_trans->blocked && may_wait_transaction(root, type)) { |
@@ -241,11 +273,8 @@ again: | |||
241 | } | 273 | } |
242 | } | 274 | } |
243 | 275 | ||
244 | if (type != TRANS_JOIN_NOLOCK) | 276 | got_it: |
245 | mutex_lock(&root->fs_info->trans_mutex); | 277 | btrfs_record_root_in_trans(h, root); |
246 | record_root_in_trans(h, root); | ||
247 | if (type != TRANS_JOIN_NOLOCK) | ||
248 | mutex_unlock(&root->fs_info->trans_mutex); | ||
249 | 278 | ||
250 | if (!current->journal_info && type != TRANS_USERSPACE) | 279 | if (!current->journal_info && type != TRANS_USERSPACE) |
251 | current->journal_info = h; | 280 | current->journal_info = h; |
@@ -257,22 +286,19 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, | |||
257 | { | 286 | { |
258 | return start_transaction(root, num_items, TRANS_START); | 287 | return start_transaction(root, num_items, TRANS_START); |
259 | } | 288 | } |
260 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, | 289 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root) |
261 | int num_blocks) | ||
262 | { | 290 | { |
263 | return start_transaction(root, 0, TRANS_JOIN); | 291 | return start_transaction(root, 0, TRANS_JOIN); |
264 | } | 292 | } |
265 | 293 | ||
266 | struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root, | 294 | struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root) |
267 | int num_blocks) | ||
268 | { | 295 | { |
269 | return start_transaction(root, 0, TRANS_JOIN_NOLOCK); | 296 | return start_transaction(root, 0, TRANS_JOIN_NOLOCK); |
270 | } | 297 | } |
271 | 298 | ||
272 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, | 299 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root) |
273 | int num_blocks) | ||
274 | { | 300 | { |
275 | return start_transaction(r, 0, TRANS_USERSPACE); | 301 | return start_transaction(root, 0, TRANS_USERSPACE); |
276 | } | 302 | } |
277 | 303 | ||
278 | /* wait for a transaction commit to be fully complete */ | 304 | /* wait for a transaction commit to be fully complete */ |
@@ -280,17 +306,13 @@ static noinline int wait_for_commit(struct btrfs_root *root, | |||
280 | struct btrfs_transaction *commit) | 306 | struct btrfs_transaction *commit) |
281 | { | 307 | { |
282 | DEFINE_WAIT(wait); | 308 | DEFINE_WAIT(wait); |
283 | mutex_lock(&root->fs_info->trans_mutex); | ||
284 | while (!commit->commit_done) { | 309 | while (!commit->commit_done) { |
285 | prepare_to_wait(&commit->commit_wait, &wait, | 310 | prepare_to_wait(&commit->commit_wait, &wait, |
286 | TASK_UNINTERRUPTIBLE); | 311 | TASK_UNINTERRUPTIBLE); |
287 | if (commit->commit_done) | 312 | if (commit->commit_done) |
288 | break; | 313 | break; |
289 | mutex_unlock(&root->fs_info->trans_mutex); | ||
290 | schedule(); | 314 | schedule(); |
291 | mutex_lock(&root->fs_info->trans_mutex); | ||
292 | } | 315 | } |
293 | mutex_unlock(&root->fs_info->trans_mutex); | ||
294 | finish_wait(&commit->commit_wait, &wait); | 316 | finish_wait(&commit->commit_wait, &wait); |
295 | return 0; | 317 | return 0; |
296 | } | 318 | } |
@@ -300,59 +322,56 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) | |||
300 | struct btrfs_transaction *cur_trans = NULL, *t; | 322 | struct btrfs_transaction *cur_trans = NULL, *t; |
301 | int ret; | 323 | int ret; |
302 | 324 | ||
303 | mutex_lock(&root->fs_info->trans_mutex); | ||
304 | |||
305 | ret = 0; | 325 | ret = 0; |
306 | if (transid) { | 326 | if (transid) { |
307 | if (transid <= root->fs_info->last_trans_committed) | 327 | if (transid <= root->fs_info->last_trans_committed) |
308 | goto out_unlock; | 328 | goto out; |
309 | 329 | ||
310 | /* find specified transaction */ | 330 | /* find specified transaction */ |
331 | spin_lock(&root->fs_info->trans_lock); | ||
311 | list_for_each_entry(t, &root->fs_info->trans_list, list) { | 332 | list_for_each_entry(t, &root->fs_info->trans_list, list) { |
312 | if (t->transid == transid) { | 333 | if (t->transid == transid) { |
313 | cur_trans = t; | 334 | cur_trans = t; |
335 | atomic_inc(&cur_trans->use_count); | ||
314 | break; | 336 | break; |
315 | } | 337 | } |
316 | if (t->transid > transid) | 338 | if (t->transid > transid) |
317 | break; | 339 | break; |
318 | } | 340 | } |
341 | spin_unlock(&root->fs_info->trans_lock); | ||
319 | ret = -EINVAL; | 342 | ret = -EINVAL; |
320 | if (!cur_trans) | 343 | if (!cur_trans) |
321 | goto out_unlock; /* bad transid */ | 344 | goto out; /* bad transid */ |
322 | } else { | 345 | } else { |
323 | /* find newest transaction that is committing | committed */ | 346 | /* find newest transaction that is committing | committed */ |
347 | spin_lock(&root->fs_info->trans_lock); | ||
324 | list_for_each_entry_reverse(t, &root->fs_info->trans_list, | 348 | list_for_each_entry_reverse(t, &root->fs_info->trans_list, |
325 | list) { | 349 | list) { |
326 | if (t->in_commit) { | 350 | if (t->in_commit) { |
327 | if (t->commit_done) | 351 | if (t->commit_done) |
328 | goto out_unlock; | 352 | goto out; |
329 | cur_trans = t; | 353 | cur_trans = t; |
354 | atomic_inc(&cur_trans->use_count); | ||
330 | break; | 355 | break; |
331 | } | 356 | } |
332 | } | 357 | } |
358 | spin_unlock(&root->fs_info->trans_lock); | ||
333 | if (!cur_trans) | 359 | if (!cur_trans) |
334 | goto out_unlock; /* nothing committing|committed */ | 360 | goto out; /* nothing committing|committed */ |
335 | } | 361 | } |
336 | 362 | ||
337 | atomic_inc(&cur_trans->use_count); | ||
338 | mutex_unlock(&root->fs_info->trans_mutex); | ||
339 | |||
340 | wait_for_commit(root, cur_trans); | 363 | wait_for_commit(root, cur_trans); |
341 | 364 | ||
342 | mutex_lock(&root->fs_info->trans_mutex); | ||
343 | put_transaction(cur_trans); | 365 | put_transaction(cur_trans); |
344 | ret = 0; | 366 | ret = 0; |
345 | out_unlock: | 367 | out: |
346 | mutex_unlock(&root->fs_info->trans_mutex); | ||
347 | return ret; | 368 | return ret; |
348 | } | 369 | } |
349 | 370 | ||
350 | void btrfs_throttle(struct btrfs_root *root) | 371 | void btrfs_throttle(struct btrfs_root *root) |
351 | { | 372 | { |
352 | mutex_lock(&root->fs_info->trans_mutex); | 373 | if (!atomic_read(&root->fs_info->open_ioctl_trans)) |
353 | if (!root->fs_info->open_ioctl_trans) | ||
354 | wait_current_trans(root); | 374 | wait_current_trans(root); |
355 | mutex_unlock(&root->fs_info->trans_mutex); | ||
356 | } | 375 | } |
357 | 376 | ||
358 | static int should_end_transaction(struct btrfs_trans_handle *trans, | 377 | static int should_end_transaction(struct btrfs_trans_handle *trans, |
@@ -370,6 +389,7 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, | |||
370 | struct btrfs_transaction *cur_trans = trans->transaction; | 389 | struct btrfs_transaction *cur_trans = trans->transaction; |
371 | int updates; | 390 | int updates; |
372 | 391 | ||
392 | smp_mb(); | ||
373 | if (cur_trans->blocked || cur_trans->delayed_refs.flushing) | 393 | if (cur_trans->blocked || cur_trans->delayed_refs.flushing) |
374 | return 1; | 394 | return 1; |
375 | 395 | ||
@@ -388,6 +408,11 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
388 | struct btrfs_fs_info *info = root->fs_info; | 408 | struct btrfs_fs_info *info = root->fs_info; |
389 | int count = 0; | 409 | int count = 0; |
390 | 410 | ||
411 | if (--trans->use_count) { | ||
412 | trans->block_rsv = trans->orig_rsv; | ||
413 | return 0; | ||
414 | } | ||
415 | |||
391 | while (count < 4) { | 416 | while (count < 4) { |
392 | unsigned long cur = trans->delayed_ref_updates; | 417 | unsigned long cur = trans->delayed_ref_updates; |
393 | trans->delayed_ref_updates = 0; | 418 | trans->delayed_ref_updates = 0; |
@@ -410,9 +435,11 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
410 | 435 | ||
411 | btrfs_trans_release_metadata(trans, root); | 436 | btrfs_trans_release_metadata(trans, root); |
412 | 437 | ||
413 | if (lock && !root->fs_info->open_ioctl_trans && | 438 | if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) && |
414 | should_end_transaction(trans, root)) | 439 | should_end_transaction(trans, root)) { |
415 | trans->transaction->blocked = 1; | 440 | trans->transaction->blocked = 1; |
441 | smp_wmb(); | ||
442 | } | ||
416 | 443 | ||
417 | if (lock && cur_trans->blocked && !cur_trans->in_commit) { | 444 | if (lock && cur_trans->blocked && !cur_trans->in_commit) { |
418 | if (throttle) | 445 | if (throttle) |
@@ -703,9 +730,9 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, | |||
703 | */ | 730 | */ |
704 | int btrfs_add_dead_root(struct btrfs_root *root) | 731 | int btrfs_add_dead_root(struct btrfs_root *root) |
705 | { | 732 | { |
706 | mutex_lock(&root->fs_info->trans_mutex); | 733 | spin_lock(&root->fs_info->trans_lock); |
707 | list_add(&root->root_list, &root->fs_info->dead_roots); | 734 | list_add(&root->root_list, &root->fs_info->dead_roots); |
708 | mutex_unlock(&root->fs_info->trans_mutex); | 735 | spin_unlock(&root->fs_info->trans_lock); |
709 | return 0; | 736 | return 0; |
710 | } | 737 | } |
711 | 738 | ||
@@ -721,6 +748,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, | |||
721 | int ret; | 748 | int ret; |
722 | int err = 0; | 749 | int err = 0; |
723 | 750 | ||
751 | spin_lock(&fs_info->fs_roots_radix_lock); | ||
724 | while (1) { | 752 | while (1) { |
725 | ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix, | 753 | ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix, |
726 | (void **)gang, 0, | 754 | (void **)gang, 0, |
@@ -733,6 +761,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, | |||
733 | radix_tree_tag_clear(&fs_info->fs_roots_radix, | 761 | radix_tree_tag_clear(&fs_info->fs_roots_radix, |
734 | (unsigned long)root->root_key.objectid, | 762 | (unsigned long)root->root_key.objectid, |
735 | BTRFS_ROOT_TRANS_TAG); | 763 | BTRFS_ROOT_TRANS_TAG); |
764 | spin_unlock(&fs_info->fs_roots_radix_lock); | ||
736 | 765 | ||
737 | btrfs_free_log(trans, root); | 766 | btrfs_free_log(trans, root); |
738 | btrfs_update_reloc_root(trans, root); | 767 | btrfs_update_reloc_root(trans, root); |
@@ -753,10 +782,12 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, | |||
753 | err = btrfs_update_root(trans, fs_info->tree_root, | 782 | err = btrfs_update_root(trans, fs_info->tree_root, |
754 | &root->root_key, | 783 | &root->root_key, |
755 | &root->root_item); | 784 | &root->root_item); |
785 | spin_lock(&fs_info->fs_roots_radix_lock); | ||
756 | if (err) | 786 | if (err) |
757 | break; | 787 | break; |
758 | } | 788 | } |
759 | } | 789 | } |
790 | spin_unlock(&fs_info->fs_roots_radix_lock); | ||
760 | return err; | 791 | return err; |
761 | } | 792 | } |
762 | 793 | ||
@@ -786,7 +817,7 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) | |||
786 | btrfs_btree_balance_dirty(info->tree_root, nr); | 817 | btrfs_btree_balance_dirty(info->tree_root, nr); |
787 | cond_resched(); | 818 | cond_resched(); |
788 | 819 | ||
789 | if (root->fs_info->closing || ret != -EAGAIN) | 820 | if (btrfs_fs_closing(root->fs_info) || ret != -EAGAIN) |
790 | break; | 821 | break; |
791 | } | 822 | } |
792 | root->defrag_running = 0; | 823 | root->defrag_running = 0; |
@@ -851,7 +882,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
851 | parent = dget_parent(dentry); | 882 | parent = dget_parent(dentry); |
852 | parent_inode = parent->d_inode; | 883 | parent_inode = parent->d_inode; |
853 | parent_root = BTRFS_I(parent_inode)->root; | 884 | parent_root = BTRFS_I(parent_inode)->root; |
854 | record_root_in_trans(trans, parent_root); | 885 | btrfs_record_root_in_trans(trans, parent_root); |
855 | 886 | ||
856 | /* | 887 | /* |
857 | * insert the directory item | 888 | * insert the directory item |
@@ -869,7 +900,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
869 | ret = btrfs_update_inode(trans, parent_root, parent_inode); | 900 | ret = btrfs_update_inode(trans, parent_root, parent_inode); |
870 | BUG_ON(ret); | 901 | BUG_ON(ret); |
871 | 902 | ||
872 | record_root_in_trans(trans, root); | 903 | btrfs_record_root_in_trans(trans, root); |
873 | btrfs_set_root_last_snapshot(&root->root_item, trans->transid); | 904 | btrfs_set_root_last_snapshot(&root->root_item, trans->transid); |
874 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); | 905 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); |
875 | btrfs_check_and_init_root_item(new_root_item); | 906 | btrfs_check_and_init_root_item(new_root_item); |
@@ -967,20 +998,20 @@ static void update_super_roots(struct btrfs_root *root) | |||
967 | int btrfs_transaction_in_commit(struct btrfs_fs_info *info) | 998 | int btrfs_transaction_in_commit(struct btrfs_fs_info *info) |
968 | { | 999 | { |
969 | int ret = 0; | 1000 | int ret = 0; |
970 | spin_lock(&info->new_trans_lock); | 1001 | spin_lock(&info->trans_lock); |
971 | if (info->running_transaction) | 1002 | if (info->running_transaction) |
972 | ret = info->running_transaction->in_commit; | 1003 | ret = info->running_transaction->in_commit; |
973 | spin_unlock(&info->new_trans_lock); | 1004 | spin_unlock(&info->trans_lock); |
974 | return ret; | 1005 | return ret; |
975 | } | 1006 | } |
976 | 1007 | ||
977 | int btrfs_transaction_blocked(struct btrfs_fs_info *info) | 1008 | int btrfs_transaction_blocked(struct btrfs_fs_info *info) |
978 | { | 1009 | { |
979 | int ret = 0; | 1010 | int ret = 0; |
980 | spin_lock(&info->new_trans_lock); | 1011 | spin_lock(&info->trans_lock); |
981 | if (info->running_transaction) | 1012 | if (info->running_transaction) |
982 | ret = info->running_transaction->blocked; | 1013 | ret = info->running_transaction->blocked; |
983 | spin_unlock(&info->new_trans_lock); | 1014 | spin_unlock(&info->trans_lock); |
984 | return ret; | 1015 | return ret; |
985 | } | 1016 | } |
986 | 1017 | ||
@@ -1004,9 +1035,7 @@ static void wait_current_trans_commit_start(struct btrfs_root *root, | |||
1004 | &wait); | 1035 | &wait); |
1005 | break; | 1036 | break; |
1006 | } | 1037 | } |
1007 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1008 | schedule(); | 1038 | schedule(); |
1009 | mutex_lock(&root->fs_info->trans_mutex); | ||
1010 | finish_wait(&root->fs_info->transaction_blocked_wait, &wait); | 1039 | finish_wait(&root->fs_info->transaction_blocked_wait, &wait); |
1011 | } | 1040 | } |
1012 | } | 1041 | } |
@@ -1032,9 +1061,7 @@ static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root, | |||
1032 | &wait); | 1061 | &wait); |
1033 | break; | 1062 | break; |
1034 | } | 1063 | } |
1035 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1036 | schedule(); | 1064 | schedule(); |
1037 | mutex_lock(&root->fs_info->trans_mutex); | ||
1038 | finish_wait(&root->fs_info->transaction_wait, | 1065 | finish_wait(&root->fs_info->transaction_wait, |
1039 | &wait); | 1066 | &wait); |
1040 | } | 1067 | } |
@@ -1072,7 +1099,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, | |||
1072 | 1099 | ||
1073 | INIT_DELAYED_WORK(&ac->work, do_async_commit); | 1100 | INIT_DELAYED_WORK(&ac->work, do_async_commit); |
1074 | ac->root = root; | 1101 | ac->root = root; |
1075 | ac->newtrans = btrfs_join_transaction(root, 0); | 1102 | ac->newtrans = btrfs_join_transaction(root); |
1076 | if (IS_ERR(ac->newtrans)) { | 1103 | if (IS_ERR(ac->newtrans)) { |
1077 | int err = PTR_ERR(ac->newtrans); | 1104 | int err = PTR_ERR(ac->newtrans); |
1078 | kfree(ac); | 1105 | kfree(ac); |
@@ -1080,22 +1107,18 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, | |||
1080 | } | 1107 | } |
1081 | 1108 | ||
1082 | /* take transaction reference */ | 1109 | /* take transaction reference */ |
1083 | mutex_lock(&root->fs_info->trans_mutex); | ||
1084 | cur_trans = trans->transaction; | 1110 | cur_trans = trans->transaction; |
1085 | atomic_inc(&cur_trans->use_count); | 1111 | atomic_inc(&cur_trans->use_count); |
1086 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1087 | 1112 | ||
1088 | btrfs_end_transaction(trans, root); | 1113 | btrfs_end_transaction(trans, root); |
1089 | schedule_delayed_work(&ac->work, 0); | 1114 | schedule_delayed_work(&ac->work, 0); |
1090 | 1115 | ||
1091 | /* wait for transaction to start and unblock */ | 1116 | /* wait for transaction to start and unblock */ |
1092 | mutex_lock(&root->fs_info->trans_mutex); | ||
1093 | if (wait_for_unblock) | 1117 | if (wait_for_unblock) |
1094 | wait_current_trans_commit_start_and_unblock(root, cur_trans); | 1118 | wait_current_trans_commit_start_and_unblock(root, cur_trans); |
1095 | else | 1119 | else |
1096 | wait_current_trans_commit_start(root, cur_trans); | 1120 | wait_current_trans_commit_start(root, cur_trans); |
1097 | put_transaction(cur_trans); | 1121 | put_transaction(cur_trans); |
1098 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1099 | 1122 | ||
1100 | return 0; | 1123 | return 0; |
1101 | } | 1124 | } |
@@ -1139,38 +1162,41 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1139 | ret = btrfs_run_delayed_refs(trans, root, 0); | 1162 | ret = btrfs_run_delayed_refs(trans, root, 0); |
1140 | BUG_ON(ret); | 1163 | BUG_ON(ret); |
1141 | 1164 | ||
1142 | mutex_lock(&root->fs_info->trans_mutex); | 1165 | spin_lock(&cur_trans->commit_lock); |
1143 | if (cur_trans->in_commit) { | 1166 | if (cur_trans->in_commit) { |
1167 | spin_unlock(&cur_trans->commit_lock); | ||
1144 | atomic_inc(&cur_trans->use_count); | 1168 | atomic_inc(&cur_trans->use_count); |
1145 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1146 | btrfs_end_transaction(trans, root); | 1169 | btrfs_end_transaction(trans, root); |
1147 | 1170 | ||
1148 | ret = wait_for_commit(root, cur_trans); | 1171 | ret = wait_for_commit(root, cur_trans); |
1149 | BUG_ON(ret); | 1172 | BUG_ON(ret); |
1150 | 1173 | ||
1151 | mutex_lock(&root->fs_info->trans_mutex); | ||
1152 | put_transaction(cur_trans); | 1174 | put_transaction(cur_trans); |
1153 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1154 | 1175 | ||
1155 | return 0; | 1176 | return 0; |
1156 | } | 1177 | } |
1157 | 1178 | ||
1158 | trans->transaction->in_commit = 1; | 1179 | trans->transaction->in_commit = 1; |
1159 | trans->transaction->blocked = 1; | 1180 | trans->transaction->blocked = 1; |
1181 | spin_unlock(&cur_trans->commit_lock); | ||
1160 | wake_up(&root->fs_info->transaction_blocked_wait); | 1182 | wake_up(&root->fs_info->transaction_blocked_wait); |
1161 | 1183 | ||
1184 | spin_lock(&root->fs_info->trans_lock); | ||
1162 | if (cur_trans->list.prev != &root->fs_info->trans_list) { | 1185 | if (cur_trans->list.prev != &root->fs_info->trans_list) { |
1163 | prev_trans = list_entry(cur_trans->list.prev, | 1186 | prev_trans = list_entry(cur_trans->list.prev, |
1164 | struct btrfs_transaction, list); | 1187 | struct btrfs_transaction, list); |
1165 | if (!prev_trans->commit_done) { | 1188 | if (!prev_trans->commit_done) { |
1166 | atomic_inc(&prev_trans->use_count); | 1189 | atomic_inc(&prev_trans->use_count); |
1167 | mutex_unlock(&root->fs_info->trans_mutex); | 1190 | spin_unlock(&root->fs_info->trans_lock); |
1168 | 1191 | ||
1169 | wait_for_commit(root, prev_trans); | 1192 | wait_for_commit(root, prev_trans); |
1170 | 1193 | ||
1171 | mutex_lock(&root->fs_info->trans_mutex); | ||
1172 | put_transaction(prev_trans); | 1194 | put_transaction(prev_trans); |
1195 | } else { | ||
1196 | spin_unlock(&root->fs_info->trans_lock); | ||
1173 | } | 1197 | } |
1198 | } else { | ||
1199 | spin_unlock(&root->fs_info->trans_lock); | ||
1174 | } | 1200 | } |
1175 | 1201 | ||
1176 | if (now < cur_trans->start_time || now - cur_trans->start_time < 1) | 1202 | if (now < cur_trans->start_time || now - cur_trans->start_time < 1) |
@@ -1178,12 +1204,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1178 | 1204 | ||
1179 | do { | 1205 | do { |
1180 | int snap_pending = 0; | 1206 | int snap_pending = 0; |
1207 | |||
1181 | joined = cur_trans->num_joined; | 1208 | joined = cur_trans->num_joined; |
1182 | if (!list_empty(&trans->transaction->pending_snapshots)) | 1209 | if (!list_empty(&trans->transaction->pending_snapshots)) |
1183 | snap_pending = 1; | 1210 | snap_pending = 1; |
1184 | 1211 | ||
1185 | WARN_ON(cur_trans != trans->transaction); | 1212 | WARN_ON(cur_trans != trans->transaction); |
1186 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1187 | 1213 | ||
1188 | if (flush_on_commit || snap_pending) { | 1214 | if (flush_on_commit || snap_pending) { |
1189 | btrfs_start_delalloc_inodes(root, 1); | 1215 | btrfs_start_delalloc_inodes(root, 1); |
@@ -1206,14 +1232,15 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1206 | prepare_to_wait(&cur_trans->writer_wait, &wait, | 1232 | prepare_to_wait(&cur_trans->writer_wait, &wait, |
1207 | TASK_UNINTERRUPTIBLE); | 1233 | TASK_UNINTERRUPTIBLE); |
1208 | 1234 | ||
1209 | smp_mb(); | ||
1210 | if (atomic_read(&cur_trans->num_writers) > 1) | 1235 | if (atomic_read(&cur_trans->num_writers) > 1) |
1211 | schedule_timeout(MAX_SCHEDULE_TIMEOUT); | 1236 | schedule_timeout(MAX_SCHEDULE_TIMEOUT); |
1212 | else if (should_grow) | 1237 | else if (should_grow) |
1213 | schedule_timeout(1); | 1238 | schedule_timeout(1); |
1214 | 1239 | ||
1215 | mutex_lock(&root->fs_info->trans_mutex); | ||
1216 | finish_wait(&cur_trans->writer_wait, &wait); | 1240 | finish_wait(&cur_trans->writer_wait, &wait); |
1241 | spin_lock(&root->fs_info->trans_lock); | ||
1242 | root->fs_info->trans_no_join = 1; | ||
1243 | spin_unlock(&root->fs_info->trans_lock); | ||
1217 | } while (atomic_read(&cur_trans->num_writers) > 1 || | 1244 | } while (atomic_read(&cur_trans->num_writers) > 1 || |
1218 | (should_grow && cur_trans->num_joined != joined)); | 1245 | (should_grow && cur_trans->num_joined != joined)); |
1219 | 1246 | ||
@@ -1258,9 +1285,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1258 | btrfs_prepare_extent_commit(trans, root); | 1285 | btrfs_prepare_extent_commit(trans, root); |
1259 | 1286 | ||
1260 | cur_trans = root->fs_info->running_transaction; | 1287 | cur_trans = root->fs_info->running_transaction; |
1261 | spin_lock(&root->fs_info->new_trans_lock); | ||
1262 | root->fs_info->running_transaction = NULL; | ||
1263 | spin_unlock(&root->fs_info->new_trans_lock); | ||
1264 | 1288 | ||
1265 | btrfs_set_root_node(&root->fs_info->tree_root->root_item, | 1289 | btrfs_set_root_node(&root->fs_info->tree_root->root_item, |
1266 | root->fs_info->tree_root->node); | 1290 | root->fs_info->tree_root->node); |
@@ -1281,10 +1305,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1281 | sizeof(root->fs_info->super_copy)); | 1305 | sizeof(root->fs_info->super_copy)); |
1282 | 1306 | ||
1283 | trans->transaction->blocked = 0; | 1307 | trans->transaction->blocked = 0; |
1308 | spin_lock(&root->fs_info->trans_lock); | ||
1309 | root->fs_info->running_transaction = NULL; | ||
1310 | root->fs_info->trans_no_join = 0; | ||
1311 | spin_unlock(&root->fs_info->trans_lock); | ||
1284 | 1312 | ||
1285 | wake_up(&root->fs_info->transaction_wait); | 1313 | wake_up(&root->fs_info->transaction_wait); |
1286 | 1314 | ||
1287 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1288 | ret = btrfs_write_and_wait_transaction(trans, root); | 1315 | ret = btrfs_write_and_wait_transaction(trans, root); |
1289 | BUG_ON(ret); | 1316 | BUG_ON(ret); |
1290 | write_ctree_super(trans, root, 0); | 1317 | write_ctree_super(trans, root, 0); |
@@ -1297,22 +1324,21 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1297 | 1324 | ||
1298 | btrfs_finish_extent_commit(trans, root); | 1325 | btrfs_finish_extent_commit(trans, root); |
1299 | 1326 | ||
1300 | mutex_lock(&root->fs_info->trans_mutex); | ||
1301 | |||
1302 | cur_trans->commit_done = 1; | 1327 | cur_trans->commit_done = 1; |
1303 | 1328 | ||
1304 | root->fs_info->last_trans_committed = cur_trans->transid; | 1329 | root->fs_info->last_trans_committed = cur_trans->transid; |
1305 | 1330 | ||
1306 | wake_up(&cur_trans->commit_wait); | 1331 | wake_up(&cur_trans->commit_wait); |
1307 | 1332 | ||
1333 | spin_lock(&root->fs_info->trans_lock); | ||
1308 | list_del_init(&cur_trans->list); | 1334 | list_del_init(&cur_trans->list); |
1335 | spin_unlock(&root->fs_info->trans_lock); | ||
1336 | |||
1309 | put_transaction(cur_trans); | 1337 | put_transaction(cur_trans); |
1310 | put_transaction(cur_trans); | 1338 | put_transaction(cur_trans); |
1311 | 1339 | ||
1312 | trace_btrfs_transaction_commit(root); | 1340 | trace_btrfs_transaction_commit(root); |
1313 | 1341 | ||
1314 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1315 | |||
1316 | btrfs_scrub_continue(root); | 1342 | btrfs_scrub_continue(root); |
1317 | 1343 | ||
1318 | if (current->journal_info == trans) | 1344 | if (current->journal_info == trans) |
@@ -1334,9 +1360,9 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root) | |||
1334 | LIST_HEAD(list); | 1360 | LIST_HEAD(list); |
1335 | struct btrfs_fs_info *fs_info = root->fs_info; | 1361 | struct btrfs_fs_info *fs_info = root->fs_info; |
1336 | 1362 | ||
1337 | mutex_lock(&fs_info->trans_mutex); | 1363 | spin_lock(&fs_info->trans_lock); |
1338 | list_splice_init(&fs_info->dead_roots, &list); | 1364 | list_splice_init(&fs_info->dead_roots, &list); |
1339 | mutex_unlock(&fs_info->trans_mutex); | 1365 | spin_unlock(&fs_info->trans_lock); |
1340 | 1366 | ||
1341 | while (!list_empty(&list)) { | 1367 | while (!list_empty(&list)) { |
1342 | root = list_entry(list.next, struct btrfs_root, root_list); | 1368 | root = list_entry(list.next, struct btrfs_root, root_list); |
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 804c88639e5d..02564e6230ac 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h | |||
@@ -28,10 +28,12 @@ struct btrfs_transaction { | |||
28 | * transaction can end | 28 | * transaction can end |
29 | */ | 29 | */ |
30 | atomic_t num_writers; | 30 | atomic_t num_writers; |
31 | atomic_t use_count; | ||
31 | 32 | ||
32 | unsigned long num_joined; | 33 | unsigned long num_joined; |
34 | |||
35 | spinlock_t commit_lock; | ||
33 | int in_commit; | 36 | int in_commit; |
34 | atomic_t use_count; | ||
35 | int commit_done; | 37 | int commit_done; |
36 | int blocked; | 38 | int blocked; |
37 | struct list_head list; | 39 | struct list_head list; |
@@ -45,13 +47,14 @@ struct btrfs_transaction { | |||
45 | 47 | ||
46 | struct btrfs_trans_handle { | 48 | struct btrfs_trans_handle { |
47 | u64 transid; | 49 | u64 transid; |
48 | u64 block_group; | ||
49 | u64 bytes_reserved; | 50 | u64 bytes_reserved; |
51 | unsigned long use_count; | ||
50 | unsigned long blocks_reserved; | 52 | unsigned long blocks_reserved; |
51 | unsigned long blocks_used; | 53 | unsigned long blocks_used; |
52 | unsigned long delayed_ref_updates; | 54 | unsigned long delayed_ref_updates; |
53 | struct btrfs_transaction *transaction; | 55 | struct btrfs_transaction *transaction; |
54 | struct btrfs_block_rsv *block_rsv; | 56 | struct btrfs_block_rsv *block_rsv; |
57 | struct btrfs_block_rsv *orig_rsv; | ||
55 | }; | 58 | }; |
56 | 59 | ||
57 | struct btrfs_pending_snapshot { | 60 | struct btrfs_pending_snapshot { |
@@ -66,19 +69,6 @@ struct btrfs_pending_snapshot { | |||
66 | struct list_head list; | 69 | struct list_head list; |
67 | }; | 70 | }; |
68 | 71 | ||
69 | static inline void btrfs_set_trans_block_group(struct btrfs_trans_handle *trans, | ||
70 | struct inode *inode) | ||
71 | { | ||
72 | trans->block_group = BTRFS_I(inode)->block_group; | ||
73 | } | ||
74 | |||
75 | static inline void btrfs_update_inode_block_group( | ||
76 | struct btrfs_trans_handle *trans, | ||
77 | struct inode *inode) | ||
78 | { | ||
79 | BTRFS_I(inode)->block_group = trans->block_group; | ||
80 | } | ||
81 | |||
82 | static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans, | 72 | static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans, |
83 | struct inode *inode) | 73 | struct inode *inode) |
84 | { | 74 | { |
@@ -92,12 +82,9 @@ int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans, | |||
92 | struct btrfs_root *root); | 82 | struct btrfs_root *root); |
93 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, | 83 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, |
94 | int num_items); | 84 | int num_items); |
95 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, | 85 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root); |
96 | int num_blocks); | 86 | struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root); |
97 | struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root, | 87 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root); |
98 | int num_blocks); | ||
99 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, | ||
100 | int num_blocks); | ||
101 | int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid); | 88 | int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid); |
102 | int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, | 89 | int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, |
103 | struct btrfs_root *root); | 90 | struct btrfs_root *root); |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index c48214ef5c09..da541dfca2e3 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -504,7 +504,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) | |||
504 | BUG_ON(!new_device); | 504 | BUG_ON(!new_device); |
505 | memcpy(new_device, device, sizeof(*new_device)); | 505 | memcpy(new_device, device, sizeof(*new_device)); |
506 | new_device->name = kstrdup(device->name, GFP_NOFS); | 506 | new_device->name = kstrdup(device->name, GFP_NOFS); |
507 | BUG_ON(!new_device->name); | 507 | BUG_ON(device->name && !new_device->name); |
508 | new_device->bdev = NULL; | 508 | new_device->bdev = NULL; |
509 | new_device->writeable = 0; | 509 | new_device->writeable = 0; |
510 | new_device->in_fs_metadata = 0; | 510 | new_device->in_fs_metadata = 0; |
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index f3107e4b4d56..5366fe452ab0 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c | |||
@@ -158,8 +158,6 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans, | |||
158 | if (IS_ERR(trans)) | 158 | if (IS_ERR(trans)) |
159 | return PTR_ERR(trans); | 159 | return PTR_ERR(trans); |
160 | 160 | ||
161 | btrfs_set_trans_block_group(trans, inode); | ||
162 | |||
163 | ret = do_setxattr(trans, inode, name, value, size, flags); | 161 | ret = do_setxattr(trans, inode, name, value, size, flags); |
164 | if (ret) | 162 | if (ret) |
165 | goto out; | 163 | goto out; |
diff --git a/fs/buffer.c b/fs/buffer.c index 698c6b2cc462..49c9aada0374 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -2382,6 +2382,7 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, | |||
2382 | ret = -EAGAIN; | 2382 | ret = -EAGAIN; |
2383 | goto out_unlock; | 2383 | goto out_unlock; |
2384 | } | 2384 | } |
2385 | wait_on_page_writeback(page); | ||
2385 | return 0; | 2386 | return 0; |
2386 | out_unlock: | 2387 | out_unlock: |
2387 | unlock_page(page); | 2388 | unlock_page(page); |
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 8f1700623b41..21de1d6d5849 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c | |||
@@ -74,8 +74,9 @@ shrink_idmap_tree(struct rb_root *root, int nr_to_scan, int *nr_rem, | |||
74 | * Run idmap cache shrinker. | 74 | * Run idmap cache shrinker. |
75 | */ | 75 | */ |
76 | static int | 76 | static int |
77 | cifs_idmap_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) | 77 | cifs_idmap_shrinker(struct shrinker *shrink, struct shrink_control *sc) |
78 | { | 78 | { |
79 | int nr_to_scan = sc->nr_to_scan; | ||
79 | int nr_del = 0; | 80 | int nr_del = 0; |
80 | int nr_rem = 0; | 81 | int nr_rem = 0; |
81 | struct rb_root *root; | 82 | struct rb_root *root; |
diff --git a/fs/coda/dir.c b/fs/coda/dir.c index a46126fd5735..2b8dae4d121e 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c | |||
@@ -336,8 +336,6 @@ static int coda_rmdir(struct inode *dir, struct dentry *de) | |||
336 | int len = de->d_name.len; | 336 | int len = de->d_name.len; |
337 | int error; | 337 | int error; |
338 | 338 | ||
339 | dentry_unhash(de); | ||
340 | |||
341 | error = venus_rmdir(dir->i_sb, coda_i2f(dir), name, len); | 339 | error = venus_rmdir(dir->i_sb, coda_i2f(dir), name, len); |
342 | if (!error) { | 340 | if (!error) { |
343 | /* VFS may delete the child */ | 341 | /* VFS may delete the child */ |
@@ -361,9 +359,6 @@ static int coda_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
361 | int new_length = new_dentry->d_name.len; | 359 | int new_length = new_dentry->d_name.len; |
362 | int error; | 360 | int error; |
363 | 361 | ||
364 | if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) | ||
365 | dentry_unhash(new_dentry); | ||
366 | |||
367 | error = venus_rename(old_dir->i_sb, coda_i2f(old_dir), | 362 | error = venus_rename(old_dir->i_sb, coda_i2f(old_dir), |
368 | coda_i2f(new_dir), old_length, new_length, | 363 | coda_i2f(new_dir), old_length, new_length, |
369 | (const char *) old_name, (const char *)new_name); | 364 | (const char *) old_name, (const char *)new_name); |
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 9d17d350abc5..9a37a9b6de3a 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c | |||
@@ -1359,8 +1359,6 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
1359 | struct module *subsys_owner = NULL, *dead_item_owner = NULL; | 1359 | struct module *subsys_owner = NULL, *dead_item_owner = NULL; |
1360 | int ret; | 1360 | int ret; |
1361 | 1361 | ||
1362 | dentry_unhash(dentry); | ||
1363 | |||
1364 | if (dentry->d_parent == configfs_sb->s_root) | 1362 | if (dentry->d_parent == configfs_sb->s_root) |
1365 | return -EPERM; | 1363 | return -EPERM; |
1366 | 1364 | ||
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index b8d5c8091024..58609bde3b9f 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c | |||
@@ -1024,25 +1024,25 @@ out: | |||
1024 | } | 1024 | } |
1025 | 1025 | ||
1026 | /** | 1026 | /** |
1027 | * contains_ecryptfs_marker - check for the ecryptfs marker | 1027 | * ecryptfs_validate_marker - check for the ecryptfs marker |
1028 | * @data: The data block in which to check | 1028 | * @data: The data block in which to check |
1029 | * | 1029 | * |
1030 | * Returns one if marker found; zero if not found | 1030 | * Returns zero if marker found; -EINVAL if not found |
1031 | */ | 1031 | */ |
1032 | static int contains_ecryptfs_marker(char *data) | 1032 | static int ecryptfs_validate_marker(char *data) |
1033 | { | 1033 | { |
1034 | u32 m_1, m_2; | 1034 | u32 m_1, m_2; |
1035 | 1035 | ||
1036 | m_1 = get_unaligned_be32(data); | 1036 | m_1 = get_unaligned_be32(data); |
1037 | m_2 = get_unaligned_be32(data + 4); | 1037 | m_2 = get_unaligned_be32(data + 4); |
1038 | if ((m_1 ^ MAGIC_ECRYPTFS_MARKER) == m_2) | 1038 | if ((m_1 ^ MAGIC_ECRYPTFS_MARKER) == m_2) |
1039 | return 1; | 1039 | return 0; |
1040 | ecryptfs_printk(KERN_DEBUG, "m_1 = [0x%.8x]; m_2 = [0x%.8x]; " | 1040 | ecryptfs_printk(KERN_DEBUG, "m_1 = [0x%.8x]; m_2 = [0x%.8x]; " |
1041 | "MAGIC_ECRYPTFS_MARKER = [0x%.8x]\n", m_1, m_2, | 1041 | "MAGIC_ECRYPTFS_MARKER = [0x%.8x]\n", m_1, m_2, |
1042 | MAGIC_ECRYPTFS_MARKER); | 1042 | MAGIC_ECRYPTFS_MARKER); |
1043 | ecryptfs_printk(KERN_DEBUG, "(m_1 ^ MAGIC_ECRYPTFS_MARKER) = " | 1043 | ecryptfs_printk(KERN_DEBUG, "(m_1 ^ MAGIC_ECRYPTFS_MARKER) = " |
1044 | "[0x%.8x]\n", (m_1 ^ MAGIC_ECRYPTFS_MARKER)); | 1044 | "[0x%.8x]\n", (m_1 ^ MAGIC_ECRYPTFS_MARKER)); |
1045 | return 0; | 1045 | return -EINVAL; |
1046 | } | 1046 | } |
1047 | 1047 | ||
1048 | struct ecryptfs_flag_map_elem { | 1048 | struct ecryptfs_flag_map_elem { |
@@ -1201,27 +1201,19 @@ int ecryptfs_cipher_code_to_string(char *str, u8 cipher_code) | |||
1201 | return rc; | 1201 | return rc; |
1202 | } | 1202 | } |
1203 | 1203 | ||
1204 | int ecryptfs_read_and_validate_header_region(char *data, | 1204 | int ecryptfs_read_and_validate_header_region(struct inode *inode) |
1205 | struct inode *ecryptfs_inode) | ||
1206 | { | 1205 | { |
1207 | struct ecryptfs_crypt_stat *crypt_stat = | 1206 | u8 file_size[ECRYPTFS_SIZE_AND_MARKER_BYTES]; |
1208 | &(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat); | 1207 | u8 *marker = file_size + ECRYPTFS_FILE_SIZE_BYTES; |
1209 | int rc; | 1208 | int rc; |
1210 | 1209 | ||
1211 | if (crypt_stat->extent_size == 0) | 1210 | rc = ecryptfs_read_lower(file_size, 0, ECRYPTFS_SIZE_AND_MARKER_BYTES, |
1212 | crypt_stat->extent_size = ECRYPTFS_DEFAULT_EXTENT_SIZE; | 1211 | inode); |
1213 | rc = ecryptfs_read_lower(data, 0, crypt_stat->extent_size, | 1212 | if (rc < ECRYPTFS_SIZE_AND_MARKER_BYTES) |
1214 | ecryptfs_inode); | 1213 | return rc >= 0 ? -EINVAL : rc; |
1215 | if (rc < 0) { | 1214 | rc = ecryptfs_validate_marker(marker); |
1216 | printk(KERN_ERR "%s: Error reading header region; rc = [%d]\n", | 1215 | if (!rc) |
1217 | __func__, rc); | 1216 | ecryptfs_i_size_init(file_size, inode); |
1218 | goto out; | ||
1219 | } | ||
1220 | if (!contains_ecryptfs_marker(data + ECRYPTFS_FILE_SIZE_BYTES)) { | ||
1221 | rc = -EINVAL; | ||
1222 | } else | ||
1223 | rc = 0; | ||
1224 | out: | ||
1225 | return rc; | 1217 | return rc; |
1226 | } | 1218 | } |
1227 | 1219 | ||
@@ -1242,8 +1234,7 @@ ecryptfs_write_header_metadata(char *virt, | |||
1242 | (*written) = 6; | 1234 | (*written) = 6; |
1243 | } | 1235 | } |
1244 | 1236 | ||
1245 | struct kmem_cache *ecryptfs_header_cache_1; | 1237 | struct kmem_cache *ecryptfs_header_cache; |
1246 | struct kmem_cache *ecryptfs_header_cache_2; | ||
1247 | 1238 | ||
1248 | /** | 1239 | /** |
1249 | * ecryptfs_write_headers_virt | 1240 | * ecryptfs_write_headers_virt |
@@ -1496,11 +1487,9 @@ static int ecryptfs_read_headers_virt(char *page_virt, | |||
1496 | crypt_stat->mount_crypt_stat = &ecryptfs_superblock_to_private( | 1487 | crypt_stat->mount_crypt_stat = &ecryptfs_superblock_to_private( |
1497 | ecryptfs_dentry->d_sb)->mount_crypt_stat; | 1488 | ecryptfs_dentry->d_sb)->mount_crypt_stat; |
1498 | offset = ECRYPTFS_FILE_SIZE_BYTES; | 1489 | offset = ECRYPTFS_FILE_SIZE_BYTES; |
1499 | rc = contains_ecryptfs_marker(page_virt + offset); | 1490 | rc = ecryptfs_validate_marker(page_virt + offset); |
1500 | if (rc == 0) { | 1491 | if (rc) |
1501 | rc = -EINVAL; | ||
1502 | goto out; | 1492 | goto out; |
1503 | } | ||
1504 | if (!(crypt_stat->flags & ECRYPTFS_I_SIZE_INITIALIZED)) | 1493 | if (!(crypt_stat->flags & ECRYPTFS_I_SIZE_INITIALIZED)) |
1505 | ecryptfs_i_size_init(page_virt, ecryptfs_dentry->d_inode); | 1494 | ecryptfs_i_size_init(page_virt, ecryptfs_dentry->d_inode); |
1506 | offset += MAGIC_ECRYPTFS_MARKER_SIZE_BYTES; | 1495 | offset += MAGIC_ECRYPTFS_MARKER_SIZE_BYTES; |
@@ -1567,20 +1556,21 @@ out: | |||
1567 | return rc; | 1556 | return rc; |
1568 | } | 1557 | } |
1569 | 1558 | ||
1570 | int ecryptfs_read_and_validate_xattr_region(char *page_virt, | 1559 | int ecryptfs_read_and_validate_xattr_region(struct dentry *dentry, |
1571 | struct dentry *ecryptfs_dentry) | 1560 | struct inode *inode) |
1572 | { | 1561 | { |
1562 | u8 file_size[ECRYPTFS_SIZE_AND_MARKER_BYTES]; | ||
1563 | u8 *marker = file_size + ECRYPTFS_FILE_SIZE_BYTES; | ||
1573 | int rc; | 1564 | int rc; |
1574 | 1565 | ||
1575 | rc = ecryptfs_read_xattr_region(page_virt, ecryptfs_dentry->d_inode); | 1566 | rc = ecryptfs_getxattr_lower(ecryptfs_dentry_to_lower(dentry), |
1576 | if (rc) | 1567 | ECRYPTFS_XATTR_NAME, file_size, |
1577 | goto out; | 1568 | ECRYPTFS_SIZE_AND_MARKER_BYTES); |
1578 | if (!contains_ecryptfs_marker(page_virt + ECRYPTFS_FILE_SIZE_BYTES)) { | 1569 | if (rc < ECRYPTFS_SIZE_AND_MARKER_BYTES) |
1579 | printk(KERN_WARNING "Valid data found in [%s] xattr, but " | 1570 | return rc >= 0 ? -EINVAL : rc; |
1580 | "the marker is invalid\n", ECRYPTFS_XATTR_NAME); | 1571 | rc = ecryptfs_validate_marker(marker); |
1581 | rc = -EINVAL; | 1572 | if (!rc) |
1582 | } | 1573 | ecryptfs_i_size_init(file_size, inode); |
1583 | out: | ||
1584 | return rc; | 1574 | return rc; |
1585 | } | 1575 | } |
1586 | 1576 | ||
@@ -1610,7 +1600,7 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry) | |||
1610 | ecryptfs_copy_mount_wide_flags_to_inode_flags(crypt_stat, | 1600 | ecryptfs_copy_mount_wide_flags_to_inode_flags(crypt_stat, |
1611 | mount_crypt_stat); | 1601 | mount_crypt_stat); |
1612 | /* Read the first page from the underlying file */ | 1602 | /* Read the first page from the underlying file */ |
1613 | page_virt = kmem_cache_alloc(ecryptfs_header_cache_1, GFP_USER); | 1603 | page_virt = kmem_cache_alloc(ecryptfs_header_cache, GFP_USER); |
1614 | if (!page_virt) { | 1604 | if (!page_virt) { |
1615 | rc = -ENOMEM; | 1605 | rc = -ENOMEM; |
1616 | printk(KERN_ERR "%s: Unable to allocate page_virt\n", | 1606 | printk(KERN_ERR "%s: Unable to allocate page_virt\n", |
@@ -1655,7 +1645,7 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry) | |||
1655 | out: | 1645 | out: |
1656 | if (page_virt) { | 1646 | if (page_virt) { |
1657 | memset(page_virt, 0, PAGE_CACHE_SIZE); | 1647 | memset(page_virt, 0, PAGE_CACHE_SIZE); |
1658 | kmem_cache_free(ecryptfs_header_cache_1, page_virt); | 1648 | kmem_cache_free(ecryptfs_header_cache, page_virt); |
1659 | } | 1649 | } |
1660 | return rc; | 1650 | return rc; |
1661 | } | 1651 | } |
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index e70282775e2c..43c7c43b06f5 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h | |||
@@ -200,6 +200,8 @@ ecryptfs_get_key_payload_data(struct key *key) | |||
200 | #define MAGIC_ECRYPTFS_MARKER 0x3c81b7f5 | 200 | #define MAGIC_ECRYPTFS_MARKER 0x3c81b7f5 |
201 | #define MAGIC_ECRYPTFS_MARKER_SIZE_BYTES 8 /* 4*2 */ | 201 | #define MAGIC_ECRYPTFS_MARKER_SIZE_BYTES 8 /* 4*2 */ |
202 | #define ECRYPTFS_FILE_SIZE_BYTES (sizeof(u64)) | 202 | #define ECRYPTFS_FILE_SIZE_BYTES (sizeof(u64)) |
203 | #define ECRYPTFS_SIZE_AND_MARKER_BYTES (ECRYPTFS_FILE_SIZE_BYTES \ | ||
204 | + MAGIC_ECRYPTFS_MARKER_SIZE_BYTES) | ||
203 | #define ECRYPTFS_DEFAULT_CIPHER "aes" | 205 | #define ECRYPTFS_DEFAULT_CIPHER "aes" |
204 | #define ECRYPTFS_DEFAULT_KEY_BYTES 16 | 206 | #define ECRYPTFS_DEFAULT_KEY_BYTES 16 |
205 | #define ECRYPTFS_DEFAULT_HASH "md5" | 207 | #define ECRYPTFS_DEFAULT_HASH "md5" |
@@ -603,8 +605,7 @@ extern struct kmem_cache *ecryptfs_file_info_cache; | |||
603 | extern struct kmem_cache *ecryptfs_dentry_info_cache; | 605 | extern struct kmem_cache *ecryptfs_dentry_info_cache; |
604 | extern struct kmem_cache *ecryptfs_inode_info_cache; | 606 | extern struct kmem_cache *ecryptfs_inode_info_cache; |
605 | extern struct kmem_cache *ecryptfs_sb_info_cache; | 607 | extern struct kmem_cache *ecryptfs_sb_info_cache; |
606 | extern struct kmem_cache *ecryptfs_header_cache_1; | 608 | extern struct kmem_cache *ecryptfs_header_cache; |
607 | extern struct kmem_cache *ecryptfs_header_cache_2; | ||
608 | extern struct kmem_cache *ecryptfs_xattr_cache; | 609 | extern struct kmem_cache *ecryptfs_xattr_cache; |
609 | extern struct kmem_cache *ecryptfs_key_record_cache; | 610 | extern struct kmem_cache *ecryptfs_key_record_cache; |
610 | extern struct kmem_cache *ecryptfs_key_sig_cache; | 611 | extern struct kmem_cache *ecryptfs_key_sig_cache; |
@@ -625,14 +626,9 @@ struct ecryptfs_open_req { | |||
625 | struct list_head kthread_ctl_list; | 626 | struct list_head kthread_ctl_list; |
626 | }; | 627 | }; |
627 | 628 | ||
628 | #define ECRYPTFS_INTERPOSE_FLAG_D_ADD 0x00000001 | 629 | struct inode *ecryptfs_get_inode(struct inode *lower_inode, |
629 | int ecryptfs_interpose(struct dentry *hidden_dentry, | 630 | struct super_block *sb); |
630 | struct dentry *this_dentry, struct super_block *sb, | ||
631 | u32 flags); | ||
632 | void ecryptfs_i_size_init(const char *page_virt, struct inode *inode); | 631 | void ecryptfs_i_size_init(const char *page_virt, struct inode *inode); |
633 | int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry, | ||
634 | struct dentry *lower_dentry, | ||
635 | struct inode *ecryptfs_dir_inode); | ||
636 | int ecryptfs_decode_and_decrypt_filename(char **decrypted_name, | 632 | int ecryptfs_decode_and_decrypt_filename(char **decrypted_name, |
637 | size_t *decrypted_name_size, | 633 | size_t *decrypted_name_size, |
638 | struct dentry *ecryptfs_dentry, | 634 | struct dentry *ecryptfs_dentry, |
@@ -664,10 +660,9 @@ int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry); | |||
664 | void ecryptfs_write_crypt_stat_flags(char *page_virt, | 660 | void ecryptfs_write_crypt_stat_flags(char *page_virt, |
665 | struct ecryptfs_crypt_stat *crypt_stat, | 661 | struct ecryptfs_crypt_stat *crypt_stat, |
666 | size_t *written); | 662 | size_t *written); |
667 | int ecryptfs_read_and_validate_header_region(char *data, | 663 | int ecryptfs_read_and_validate_header_region(struct inode *inode); |
668 | struct inode *ecryptfs_inode); | 664 | int ecryptfs_read_and_validate_xattr_region(struct dentry *dentry, |
669 | int ecryptfs_read_and_validate_xattr_region(char *page_virt, | 665 | struct inode *inode); |
670 | struct dentry *ecryptfs_dentry); | ||
671 | u8 ecryptfs_code_for_cipher_string(char *cipher_name, size_t key_bytes); | 666 | u8 ecryptfs_code_for_cipher_string(char *cipher_name, size_t key_bytes); |
672 | int ecryptfs_cipher_code_to_string(char *str, u8 cipher_code); | 667 | int ecryptfs_cipher_code_to_string(char *str, u8 cipher_code); |
673 | void ecryptfs_set_default_sizes(struct ecryptfs_crypt_stat *crypt_stat); | 668 | void ecryptfs_set_default_sizes(struct ecryptfs_crypt_stat *crypt_stat); |
@@ -679,9 +674,6 @@ int | |||
679 | ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat, | 674 | ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat, |
680 | unsigned char *src, struct dentry *ecryptfs_dentry); | 675 | unsigned char *src, struct dentry *ecryptfs_dentry); |
681 | int ecryptfs_truncate(struct dentry *dentry, loff_t new_length); | 676 | int ecryptfs_truncate(struct dentry *dentry, loff_t new_length); |
682 | int ecryptfs_inode_test(struct inode *inode, void *candidate_lower_inode); | ||
683 | int ecryptfs_inode_set(struct inode *inode, void *lower_inode); | ||
684 | void ecryptfs_init_inode(struct inode *inode, struct inode *lower_inode); | ||
685 | ssize_t | 677 | ssize_t |
686 | ecryptfs_getxattr_lower(struct dentry *lower_dentry, const char *name, | 678 | ecryptfs_getxattr_lower(struct dentry *lower_dentry, const char *name, |
687 | void *value, size_t size); | 679 | void *value, size_t size); |
@@ -761,7 +753,7 @@ int ecryptfs_privileged_open(struct file **lower_file, | |||
761 | struct dentry *lower_dentry, | 753 | struct dentry *lower_dentry, |
762 | struct vfsmount *lower_mnt, | 754 | struct vfsmount *lower_mnt, |
763 | const struct cred *cred); | 755 | const struct cred *cred); |
764 | int ecryptfs_get_lower_file(struct dentry *ecryptfs_dentry); | 756 | int ecryptfs_get_lower_file(struct dentry *dentry, struct inode *inode); |
765 | void ecryptfs_put_lower_file(struct inode *inode); | 757 | void ecryptfs_put_lower_file(struct inode *inode); |
766 | int | 758 | int |
767 | ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes, | 759 | ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes, |
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 566e5472f78c..4ec9eb00a241 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c | |||
@@ -191,7 +191,7 @@ static int ecryptfs_open(struct inode *inode, struct file *file) | |||
191 | | ECRYPTFS_ENCRYPTED); | 191 | | ECRYPTFS_ENCRYPTED); |
192 | } | 192 | } |
193 | mutex_unlock(&crypt_stat->cs_mutex); | 193 | mutex_unlock(&crypt_stat->cs_mutex); |
194 | rc = ecryptfs_get_lower_file(ecryptfs_dentry); | 194 | rc = ecryptfs_get_lower_file(ecryptfs_dentry, inode); |
195 | if (rc) { | 195 | if (rc) { |
196 | printk(KERN_ERR "%s: Error attempting to initialize " | 196 | printk(KERN_ERR "%s: Error attempting to initialize " |
197 | "the lower file for the dentry with name " | 197 | "the lower file for the dentry with name " |
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index bc116b9ffcf2..7349ade17de6 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c | |||
@@ -51,6 +51,97 @@ static void unlock_dir(struct dentry *dir) | |||
51 | dput(dir); | 51 | dput(dir); |
52 | } | 52 | } |
53 | 53 | ||
54 | static int ecryptfs_inode_test(struct inode *inode, void *lower_inode) | ||
55 | { | ||
56 | if (ecryptfs_inode_to_lower(inode) == (struct inode *)lower_inode) | ||
57 | return 1; | ||
58 | return 0; | ||
59 | } | ||
60 | |||
61 | static int ecryptfs_inode_set(struct inode *inode, void *opaque) | ||
62 | { | ||
63 | struct inode *lower_inode = opaque; | ||
64 | |||
65 | ecryptfs_set_inode_lower(inode, lower_inode); | ||
66 | fsstack_copy_attr_all(inode, lower_inode); | ||
67 | /* i_size will be overwritten for encrypted regular files */ | ||
68 | fsstack_copy_inode_size(inode, lower_inode); | ||
69 | inode->i_ino = lower_inode->i_ino; | ||
70 | inode->i_version++; | ||
71 | inode->i_mapping->a_ops = &ecryptfs_aops; | ||
72 | |||
73 | if (S_ISLNK(inode->i_mode)) | ||
74 | inode->i_op = &ecryptfs_symlink_iops; | ||
75 | else if (S_ISDIR(inode->i_mode)) | ||
76 | inode->i_op = &ecryptfs_dir_iops; | ||
77 | else | ||
78 | inode->i_op = &ecryptfs_main_iops; | ||
79 | |||
80 | if (S_ISDIR(inode->i_mode)) | ||
81 | inode->i_fop = &ecryptfs_dir_fops; | ||
82 | else if (special_file(inode->i_mode)) | ||
83 | init_special_inode(inode, inode->i_mode, inode->i_rdev); | ||
84 | else | ||
85 | inode->i_fop = &ecryptfs_main_fops; | ||
86 | |||
87 | return 0; | ||
88 | } | ||
89 | |||
90 | static struct inode *__ecryptfs_get_inode(struct inode *lower_inode, | ||
91 | struct super_block *sb) | ||
92 | { | ||
93 | struct inode *inode; | ||
94 | |||
95 | if (lower_inode->i_sb != ecryptfs_superblock_to_lower(sb)) | ||
96 | return ERR_PTR(-EXDEV); | ||
97 | if (!igrab(lower_inode)) | ||
98 | return ERR_PTR(-ESTALE); | ||
99 | inode = iget5_locked(sb, (unsigned long)lower_inode, | ||
100 | ecryptfs_inode_test, ecryptfs_inode_set, | ||
101 | lower_inode); | ||
102 | if (!inode) { | ||
103 | iput(lower_inode); | ||
104 | return ERR_PTR(-EACCES); | ||
105 | } | ||
106 | if (!(inode->i_state & I_NEW)) | ||
107 | iput(lower_inode); | ||
108 | |||
109 | return inode; | ||
110 | } | ||
111 | |||
112 | struct inode *ecryptfs_get_inode(struct inode *lower_inode, | ||
113 | struct super_block *sb) | ||
114 | { | ||
115 | struct inode *inode = __ecryptfs_get_inode(lower_inode, sb); | ||
116 | |||
117 | if (!IS_ERR(inode) && (inode->i_state & I_NEW)) | ||
118 | unlock_new_inode(inode); | ||
119 | |||
120 | return inode; | ||
121 | } | ||
122 | |||
123 | /** | ||
124 | * ecryptfs_interpose | ||
125 | * @lower_dentry: Existing dentry in the lower filesystem | ||
126 | * @dentry: ecryptfs' dentry | ||
127 | * @sb: ecryptfs's super_block | ||
128 | * | ||
129 | * Interposes upper and lower dentries. | ||
130 | * | ||
131 | * Returns zero on success; non-zero otherwise | ||
132 | */ | ||
133 | static int ecryptfs_interpose(struct dentry *lower_dentry, | ||
134 | struct dentry *dentry, struct super_block *sb) | ||
135 | { | ||
136 | struct inode *inode = ecryptfs_get_inode(lower_dentry->d_inode, sb); | ||
137 | |||
138 | if (IS_ERR(inode)) | ||
139 | return PTR_ERR(inode); | ||
140 | d_instantiate(dentry, inode); | ||
141 | |||
142 | return 0; | ||
143 | } | ||
144 | |||
54 | /** | 145 | /** |
55 | * ecryptfs_create_underlying_file | 146 | * ecryptfs_create_underlying_file |
56 | * @lower_dir_inode: inode of the parent in the lower fs of the new file | 147 | * @lower_dir_inode: inode of the parent in the lower fs of the new file |
@@ -129,7 +220,7 @@ ecryptfs_do_create(struct inode *directory_inode, | |||
129 | goto out_lock; | 220 | goto out_lock; |
130 | } | 221 | } |
131 | rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry, | 222 | rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry, |
132 | directory_inode->i_sb, 0); | 223 | directory_inode->i_sb); |
133 | if (rc) { | 224 | if (rc) { |
134 | ecryptfs_printk(KERN_ERR, "Failure in ecryptfs_interpose\n"); | 225 | ecryptfs_printk(KERN_ERR, "Failure in ecryptfs_interpose\n"); |
135 | goto out_lock; | 226 | goto out_lock; |
@@ -168,7 +259,8 @@ static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry) | |||
168 | "context; rc = [%d]\n", rc); | 259 | "context; rc = [%d]\n", rc); |
169 | goto out; | 260 | goto out; |
170 | } | 261 | } |
171 | rc = ecryptfs_get_lower_file(ecryptfs_dentry); | 262 | rc = ecryptfs_get_lower_file(ecryptfs_dentry, |
263 | ecryptfs_dentry->d_inode); | ||
172 | if (rc) { | 264 | if (rc) { |
173 | printk(KERN_ERR "%s: Error attempting to initialize " | 265 | printk(KERN_ERR "%s: Error attempting to initialize " |
174 | "the lower file for the dentry with name " | 266 | "the lower file for the dentry with name " |
@@ -215,102 +307,90 @@ out: | |||
215 | return rc; | 307 | return rc; |
216 | } | 308 | } |
217 | 309 | ||
310 | static int ecryptfs_i_size_read(struct dentry *dentry, struct inode *inode) | ||
311 | { | ||
312 | struct ecryptfs_crypt_stat *crypt_stat; | ||
313 | int rc; | ||
314 | |||
315 | rc = ecryptfs_get_lower_file(dentry, inode); | ||
316 | if (rc) { | ||
317 | printk(KERN_ERR "%s: Error attempting to initialize " | ||
318 | "the lower file for the dentry with name " | ||
319 | "[%s]; rc = [%d]\n", __func__, | ||
320 | dentry->d_name.name, rc); | ||
321 | return rc; | ||
322 | } | ||
323 | |||
324 | crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat; | ||
325 | /* TODO: lock for crypt_stat comparison */ | ||
326 | if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED)) | ||
327 | ecryptfs_set_default_sizes(crypt_stat); | ||
328 | |||
329 | rc = ecryptfs_read_and_validate_header_region(inode); | ||
330 | ecryptfs_put_lower_file(inode); | ||
331 | if (rc) { | ||
332 | rc = ecryptfs_read_and_validate_xattr_region(dentry, inode); | ||
333 | if (!rc) | ||
334 | crypt_stat->flags |= ECRYPTFS_METADATA_IN_XATTR; | ||
335 | } | ||
336 | |||
337 | /* Must return 0 to allow non-eCryptfs files to be looked up, too */ | ||
338 | return 0; | ||
339 | } | ||
340 | |||
218 | /** | 341 | /** |
219 | * ecryptfs_lookup_and_interpose_lower - Perform a lookup | 342 | * ecryptfs_lookup_interpose - Dentry interposition for a lookup |
220 | */ | 343 | */ |
221 | int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry, | 344 | static int ecryptfs_lookup_interpose(struct dentry *dentry, |
222 | struct dentry *lower_dentry, | 345 | struct dentry *lower_dentry, |
223 | struct inode *ecryptfs_dir_inode) | 346 | struct inode *dir_inode) |
224 | { | 347 | { |
225 | struct dentry *lower_dir_dentry; | 348 | struct inode *inode, *lower_inode = lower_dentry->d_inode; |
349 | struct ecryptfs_dentry_info *dentry_info; | ||
226 | struct vfsmount *lower_mnt; | 350 | struct vfsmount *lower_mnt; |
227 | struct inode *lower_inode; | 351 | int rc = 0; |
228 | struct ecryptfs_crypt_stat *crypt_stat; | 352 | |
229 | char *page_virt = NULL; | 353 | lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent)); |
230 | int put_lower = 0, rc = 0; | 354 | fsstack_copy_attr_atime(dir_inode, lower_dentry->d_parent->d_inode); |
231 | |||
232 | lower_dir_dentry = lower_dentry->d_parent; | ||
233 | lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt( | ||
234 | ecryptfs_dentry->d_parent)); | ||
235 | lower_inode = lower_dentry->d_inode; | ||
236 | fsstack_copy_attr_atime(ecryptfs_dir_inode, lower_dir_dentry->d_inode); | ||
237 | BUG_ON(!lower_dentry->d_count); | 355 | BUG_ON(!lower_dentry->d_count); |
238 | ecryptfs_set_dentry_private(ecryptfs_dentry, | 356 | |
239 | kmem_cache_alloc(ecryptfs_dentry_info_cache, | 357 | dentry_info = kmem_cache_alloc(ecryptfs_dentry_info_cache, GFP_KERNEL); |
240 | GFP_KERNEL)); | 358 | ecryptfs_set_dentry_private(dentry, dentry_info); |
241 | if (!ecryptfs_dentry_to_private(ecryptfs_dentry)) { | 359 | if (!dentry_info) { |
242 | rc = -ENOMEM; | ||
243 | printk(KERN_ERR "%s: Out of memory whilst attempting " | 360 | printk(KERN_ERR "%s: Out of memory whilst attempting " |
244 | "to allocate ecryptfs_dentry_info struct\n", | 361 | "to allocate ecryptfs_dentry_info struct\n", |
245 | __func__); | 362 | __func__); |
246 | goto out_put; | 363 | dput(lower_dentry); |
364 | mntput(lower_mnt); | ||
365 | d_drop(dentry); | ||
366 | return -ENOMEM; | ||
247 | } | 367 | } |
248 | ecryptfs_set_dentry_lower(ecryptfs_dentry, lower_dentry); | 368 | ecryptfs_set_dentry_lower(dentry, lower_dentry); |
249 | ecryptfs_set_dentry_lower_mnt(ecryptfs_dentry, lower_mnt); | 369 | ecryptfs_set_dentry_lower_mnt(dentry, lower_mnt); |
370 | |||
250 | if (!lower_dentry->d_inode) { | 371 | if (!lower_dentry->d_inode) { |
251 | /* We want to add because we couldn't find in lower */ | 372 | /* We want to add because we couldn't find in lower */ |
252 | d_add(ecryptfs_dentry, NULL); | 373 | d_add(dentry, NULL); |
253 | goto out; | 374 | return 0; |
254 | } | ||
255 | rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry, | ||
256 | ecryptfs_dir_inode->i_sb, | ||
257 | ECRYPTFS_INTERPOSE_FLAG_D_ADD); | ||
258 | if (rc) { | ||
259 | printk(KERN_ERR "%s: Error interposing; rc = [%d]\n", | ||
260 | __func__, rc); | ||
261 | goto out; | ||
262 | } | ||
263 | if (S_ISDIR(lower_inode->i_mode)) | ||
264 | goto out; | ||
265 | if (S_ISLNK(lower_inode->i_mode)) | ||
266 | goto out; | ||
267 | if (special_file(lower_inode->i_mode)) | ||
268 | goto out; | ||
269 | /* Released in this function */ | ||
270 | page_virt = kmem_cache_zalloc(ecryptfs_header_cache_2, GFP_USER); | ||
271 | if (!page_virt) { | ||
272 | printk(KERN_ERR "%s: Cannot kmem_cache_zalloc() a page\n", | ||
273 | __func__); | ||
274 | rc = -ENOMEM; | ||
275 | goto out; | ||
276 | } | 375 | } |
277 | rc = ecryptfs_get_lower_file(ecryptfs_dentry); | 376 | inode = __ecryptfs_get_inode(lower_inode, dir_inode->i_sb); |
278 | if (rc) { | 377 | if (IS_ERR(inode)) { |
279 | printk(KERN_ERR "%s: Error attempting to initialize " | 378 | printk(KERN_ERR "%s: Error interposing; rc = [%ld]\n", |
280 | "the lower file for the dentry with name " | 379 | __func__, PTR_ERR(inode)); |
281 | "[%s]; rc = [%d]\n", __func__, | 380 | return PTR_ERR(inode); |
282 | ecryptfs_dentry->d_name.name, rc); | ||
283 | goto out_free_kmem; | ||
284 | } | 381 | } |
285 | put_lower = 1; | 382 | if (S_ISREG(inode->i_mode)) { |
286 | crypt_stat = &ecryptfs_inode_to_private( | 383 | rc = ecryptfs_i_size_read(dentry, inode); |
287 | ecryptfs_dentry->d_inode)->crypt_stat; | ||
288 | /* TODO: lock for crypt_stat comparison */ | ||
289 | if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED)) | ||
290 | ecryptfs_set_default_sizes(crypt_stat); | ||
291 | rc = ecryptfs_read_and_validate_header_region(page_virt, | ||
292 | ecryptfs_dentry->d_inode); | ||
293 | if (rc) { | ||
294 | memset(page_virt, 0, PAGE_CACHE_SIZE); | ||
295 | rc = ecryptfs_read_and_validate_xattr_region(page_virt, | ||
296 | ecryptfs_dentry); | ||
297 | if (rc) { | 384 | if (rc) { |
298 | rc = 0; | 385 | make_bad_inode(inode); |
299 | goto out_free_kmem; | 386 | return rc; |
300 | } | 387 | } |
301 | crypt_stat->flags |= ECRYPTFS_METADATA_IN_XATTR; | ||
302 | } | 388 | } |
303 | ecryptfs_i_size_init(page_virt, ecryptfs_dentry->d_inode); | 389 | |
304 | out_free_kmem: | 390 | if (inode->i_state & I_NEW) |
305 | kmem_cache_free(ecryptfs_header_cache_2, page_virt); | 391 | unlock_new_inode(inode); |
306 | goto out; | 392 | d_add(dentry, inode); |
307 | out_put: | 393 | |
308 | dput(lower_dentry); | ||
309 | mntput(lower_mnt); | ||
310 | d_drop(ecryptfs_dentry); | ||
311 | out: | ||
312 | if (put_lower) | ||
313 | ecryptfs_put_lower_file(ecryptfs_dentry->d_inode); | ||
314 | return rc; | 394 | return rc; |
315 | } | 395 | } |
316 | 396 | ||
@@ -353,12 +433,12 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, | |||
353 | goto out_d_drop; | 433 | goto out_d_drop; |
354 | } | 434 | } |
355 | if (lower_dentry->d_inode) | 435 | if (lower_dentry->d_inode) |
356 | goto lookup_and_interpose; | 436 | goto interpose; |
357 | mount_crypt_stat = &ecryptfs_superblock_to_private( | 437 | mount_crypt_stat = &ecryptfs_superblock_to_private( |
358 | ecryptfs_dentry->d_sb)->mount_crypt_stat; | 438 | ecryptfs_dentry->d_sb)->mount_crypt_stat; |
359 | if (!(mount_crypt_stat | 439 | if (!(mount_crypt_stat |
360 | && (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES))) | 440 | && (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES))) |
361 | goto lookup_and_interpose; | 441 | goto interpose; |
362 | dput(lower_dentry); | 442 | dput(lower_dentry); |
363 | rc = ecryptfs_encrypt_and_encode_filename( | 443 | rc = ecryptfs_encrypt_and_encode_filename( |
364 | &encrypted_and_encoded_name, &encrypted_and_encoded_name_size, | 444 | &encrypted_and_encoded_name, &encrypted_and_encoded_name_size, |
@@ -381,9 +461,9 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, | |||
381 | encrypted_and_encoded_name); | 461 | encrypted_and_encoded_name); |
382 | goto out_d_drop; | 462 | goto out_d_drop; |
383 | } | 463 | } |
384 | lookup_and_interpose: | 464 | interpose: |
385 | rc = ecryptfs_lookup_and_interpose_lower(ecryptfs_dentry, lower_dentry, | 465 | rc = ecryptfs_lookup_interpose(ecryptfs_dentry, lower_dentry, |
386 | ecryptfs_dir_inode); | 466 | ecryptfs_dir_inode); |
387 | goto out; | 467 | goto out; |
388 | out_d_drop: | 468 | out_d_drop: |
389 | d_drop(ecryptfs_dentry); | 469 | d_drop(ecryptfs_dentry); |
@@ -411,7 +491,7 @@ static int ecryptfs_link(struct dentry *old_dentry, struct inode *dir, | |||
411 | lower_new_dentry); | 491 | lower_new_dentry); |
412 | if (rc || !lower_new_dentry->d_inode) | 492 | if (rc || !lower_new_dentry->d_inode) |
413 | goto out_lock; | 493 | goto out_lock; |
414 | rc = ecryptfs_interpose(lower_new_dentry, new_dentry, dir->i_sb, 0); | 494 | rc = ecryptfs_interpose(lower_new_dentry, new_dentry, dir->i_sb); |
415 | if (rc) | 495 | if (rc) |
416 | goto out_lock; | 496 | goto out_lock; |
417 | fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode); | 497 | fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode); |
@@ -478,7 +558,7 @@ static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry, | |||
478 | kfree(encoded_symname); | 558 | kfree(encoded_symname); |
479 | if (rc || !lower_dentry->d_inode) | 559 | if (rc || !lower_dentry->d_inode) |
480 | goto out_lock; | 560 | goto out_lock; |
481 | rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 0); | 561 | rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb); |
482 | if (rc) | 562 | if (rc) |
483 | goto out_lock; | 563 | goto out_lock; |
484 | fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode); | 564 | fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode); |
@@ -502,7 +582,7 @@ static int ecryptfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
502 | rc = vfs_mkdir(lower_dir_dentry->d_inode, lower_dentry, mode); | 582 | rc = vfs_mkdir(lower_dir_dentry->d_inode, lower_dentry, mode); |
503 | if (rc || !lower_dentry->d_inode) | 583 | if (rc || !lower_dentry->d_inode) |
504 | goto out; | 584 | goto out; |
505 | rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 0); | 585 | rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb); |
506 | if (rc) | 586 | if (rc) |
507 | goto out; | 587 | goto out; |
508 | fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode); | 588 | fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode); |
@@ -521,8 +601,6 @@ static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
521 | struct dentry *lower_dir_dentry; | 601 | struct dentry *lower_dir_dentry; |
522 | int rc; | 602 | int rc; |
523 | 603 | ||
524 | dentry_unhash(dentry); | ||
525 | |||
526 | lower_dentry = ecryptfs_dentry_to_lower(dentry); | 604 | lower_dentry = ecryptfs_dentry_to_lower(dentry); |
527 | dget(dentry); | 605 | dget(dentry); |
528 | lower_dir_dentry = lock_parent(lower_dentry); | 606 | lower_dir_dentry = lock_parent(lower_dentry); |
@@ -552,7 +630,7 @@ ecryptfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) | |||
552 | rc = vfs_mknod(lower_dir_dentry->d_inode, lower_dentry, mode, dev); | 630 | rc = vfs_mknod(lower_dir_dentry->d_inode, lower_dentry, mode, dev); |
553 | if (rc || !lower_dentry->d_inode) | 631 | if (rc || !lower_dentry->d_inode) |
554 | goto out; | 632 | goto out; |
555 | rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 0); | 633 | rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb); |
556 | if (rc) | 634 | if (rc) |
557 | goto out; | 635 | goto out; |
558 | fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode); | 636 | fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode); |
@@ -575,9 +653,6 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
575 | struct dentry *lower_new_dir_dentry; | 653 | struct dentry *lower_new_dir_dentry; |
576 | struct dentry *trap = NULL; | 654 | struct dentry *trap = NULL; |
577 | 655 | ||
578 | if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) | ||
579 | dentry_unhash(new_dentry); | ||
580 | |||
581 | lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry); | 656 | lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry); |
582 | lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry); | 657 | lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry); |
583 | dget(lower_old_dentry); | 658 | dget(lower_old_dentry); |
@@ -755,7 +830,7 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia, | |||
755 | lower_ia->ia_valid &= ~ATTR_SIZE; | 830 | lower_ia->ia_valid &= ~ATTR_SIZE; |
756 | return 0; | 831 | return 0; |
757 | } | 832 | } |
758 | rc = ecryptfs_get_lower_file(dentry); | 833 | rc = ecryptfs_get_lower_file(dentry, inode); |
759 | if (rc) | 834 | if (rc) |
760 | return rc; | 835 | return rc; |
761 | crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; | 836 | crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; |
@@ -911,7 +986,7 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia) | |||
911 | 986 | ||
912 | mount_crypt_stat = &ecryptfs_superblock_to_private( | 987 | mount_crypt_stat = &ecryptfs_superblock_to_private( |
913 | dentry->d_sb)->mount_crypt_stat; | 988 | dentry->d_sb)->mount_crypt_stat; |
914 | rc = ecryptfs_get_lower_file(dentry); | 989 | rc = ecryptfs_get_lower_file(dentry, inode); |
915 | if (rc) { | 990 | if (rc) { |
916 | mutex_unlock(&crypt_stat->cs_mutex); | 991 | mutex_unlock(&crypt_stat->cs_mutex); |
917 | goto out; | 992 | goto out; |
@@ -1084,21 +1159,6 @@ out: | |||
1084 | return rc; | 1159 | return rc; |
1085 | } | 1160 | } |
1086 | 1161 | ||
1087 | int ecryptfs_inode_test(struct inode *inode, void *candidate_lower_inode) | ||
1088 | { | ||
1089 | if ((ecryptfs_inode_to_lower(inode) | ||
1090 | == (struct inode *)candidate_lower_inode)) | ||
1091 | return 1; | ||
1092 | else | ||
1093 | return 0; | ||
1094 | } | ||
1095 | |||
1096 | int ecryptfs_inode_set(struct inode *inode, void *lower_inode) | ||
1097 | { | ||
1098 | ecryptfs_init_inode(inode, (struct inode *)lower_inode); | ||
1099 | return 0; | ||
1100 | } | ||
1101 | |||
1102 | const struct inode_operations ecryptfs_symlink_iops = { | 1162 | const struct inode_operations ecryptfs_symlink_iops = { |
1103 | .readlink = ecryptfs_readlink, | 1163 | .readlink = ecryptfs_readlink, |
1104 | .follow_link = ecryptfs_follow_link, | 1164 | .follow_link = ecryptfs_follow_link, |
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 89b93389af8e..9f1bb747d77d 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c | |||
@@ -135,12 +135,12 @@ static int ecryptfs_init_lower_file(struct dentry *dentry, | |||
135 | return rc; | 135 | return rc; |
136 | } | 136 | } |
137 | 137 | ||
138 | int ecryptfs_get_lower_file(struct dentry *dentry) | 138 | int ecryptfs_get_lower_file(struct dentry *dentry, struct inode *inode) |
139 | { | 139 | { |
140 | struct ecryptfs_inode_info *inode_info = | 140 | struct ecryptfs_inode_info *inode_info; |
141 | ecryptfs_inode_to_private(dentry->d_inode); | ||
142 | int count, rc = 0; | 141 | int count, rc = 0; |
143 | 142 | ||
143 | inode_info = ecryptfs_inode_to_private(inode); | ||
144 | mutex_lock(&inode_info->lower_file_mutex); | 144 | mutex_lock(&inode_info->lower_file_mutex); |
145 | count = atomic_inc_return(&inode_info->lower_file_count); | 145 | count = atomic_inc_return(&inode_info->lower_file_count); |
146 | if (WARN_ON_ONCE(count < 1)) | 146 | if (WARN_ON_ONCE(count < 1)) |
@@ -168,75 +168,6 @@ void ecryptfs_put_lower_file(struct inode *inode) | |||
168 | } | 168 | } |
169 | } | 169 | } |
170 | 170 | ||
171 | static struct inode *ecryptfs_get_inode(struct inode *lower_inode, | ||
172 | struct super_block *sb) | ||
173 | { | ||
174 | struct inode *inode; | ||
175 | int rc = 0; | ||
176 | |||
177 | if (lower_inode->i_sb != ecryptfs_superblock_to_lower(sb)) { | ||
178 | rc = -EXDEV; | ||
179 | goto out; | ||
180 | } | ||
181 | if (!igrab(lower_inode)) { | ||
182 | rc = -ESTALE; | ||
183 | goto out; | ||
184 | } | ||
185 | inode = iget5_locked(sb, (unsigned long)lower_inode, | ||
186 | ecryptfs_inode_test, ecryptfs_inode_set, | ||
187 | lower_inode); | ||
188 | if (!inode) { | ||
189 | rc = -EACCES; | ||
190 | iput(lower_inode); | ||
191 | goto out; | ||
192 | } | ||
193 | if (inode->i_state & I_NEW) | ||
194 | unlock_new_inode(inode); | ||
195 | else | ||
196 | iput(lower_inode); | ||
197 | if (S_ISLNK(lower_inode->i_mode)) | ||
198 | inode->i_op = &ecryptfs_symlink_iops; | ||
199 | else if (S_ISDIR(lower_inode->i_mode)) | ||
200 | inode->i_op = &ecryptfs_dir_iops; | ||
201 | if (S_ISDIR(lower_inode->i_mode)) | ||
202 | inode->i_fop = &ecryptfs_dir_fops; | ||
203 | if (special_file(lower_inode->i_mode)) | ||
204 | init_special_inode(inode, lower_inode->i_mode, | ||
205 | lower_inode->i_rdev); | ||
206 | fsstack_copy_attr_all(inode, lower_inode); | ||
207 | /* This size will be overwritten for real files w/ headers and | ||
208 | * other metadata */ | ||
209 | fsstack_copy_inode_size(inode, lower_inode); | ||
210 | return inode; | ||
211 | out: | ||
212 | return ERR_PTR(rc); | ||
213 | } | ||
214 | |||
215 | /** | ||
216 | * ecryptfs_interpose | ||
217 | * @lower_dentry: Existing dentry in the lower filesystem | ||
218 | * @dentry: ecryptfs' dentry | ||
219 | * @sb: ecryptfs's super_block | ||
220 | * @flags: flags to govern behavior of interpose procedure | ||
221 | * | ||
222 | * Interposes upper and lower dentries. | ||
223 | * | ||
224 | * Returns zero on success; non-zero otherwise | ||
225 | */ | ||
226 | int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry, | ||
227 | struct super_block *sb, u32 flags) | ||
228 | { | ||
229 | struct inode *lower_inode = lower_dentry->d_inode; | ||
230 | struct inode *inode = ecryptfs_get_inode(lower_inode, sb); | ||
231 | if (IS_ERR(inode)) | ||
232 | return PTR_ERR(inode); | ||
233 | if (flags & ECRYPTFS_INTERPOSE_FLAG_D_ADD) | ||
234 | d_add(dentry, inode); | ||
235 | else | ||
236 | d_instantiate(dentry, inode); | ||
237 | return 0; | ||
238 | } | ||
239 | |||
240 | enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig, | 171 | enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig, |
241 | ecryptfs_opt_cipher, ecryptfs_opt_ecryptfs_cipher, | 172 | ecryptfs_opt_cipher, ecryptfs_opt_ecryptfs_cipher, |
242 | ecryptfs_opt_ecryptfs_key_bytes, | 173 | ecryptfs_opt_ecryptfs_key_bytes, |
@@ -704,13 +635,8 @@ static struct ecryptfs_cache_info { | |||
704 | .size = sizeof(struct ecryptfs_sb_info), | 635 | .size = sizeof(struct ecryptfs_sb_info), |
705 | }, | 636 | }, |
706 | { | 637 | { |
707 | .cache = &ecryptfs_header_cache_1, | 638 | .cache = &ecryptfs_header_cache, |
708 | .name = "ecryptfs_headers_1", | 639 | .name = "ecryptfs_headers", |
709 | .size = PAGE_CACHE_SIZE, | ||
710 | }, | ||
711 | { | ||
712 | .cache = &ecryptfs_header_cache_2, | ||
713 | .name = "ecryptfs_headers_2", | ||
714 | .size = PAGE_CACHE_SIZE, | 640 | .size = PAGE_CACHE_SIZE, |
715 | }, | 641 | }, |
716 | { | 642 | { |
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c index 245b517bf1b6..dbd52d40df4c 100644 --- a/fs/ecryptfs/super.c +++ b/fs/ecryptfs/super.c | |||
@@ -93,22 +93,6 @@ static void ecryptfs_destroy_inode(struct inode *inode) | |||
93 | } | 93 | } |
94 | 94 | ||
95 | /** | 95 | /** |
96 | * ecryptfs_init_inode | ||
97 | * @inode: The ecryptfs inode | ||
98 | * | ||
99 | * Set up the ecryptfs inode. | ||
100 | */ | ||
101 | void ecryptfs_init_inode(struct inode *inode, struct inode *lower_inode) | ||
102 | { | ||
103 | ecryptfs_set_inode_lower(inode, lower_inode); | ||
104 | inode->i_ino = lower_inode->i_ino; | ||
105 | inode->i_version++; | ||
106 | inode->i_op = &ecryptfs_main_iops; | ||
107 | inode->i_fop = &ecryptfs_main_fops; | ||
108 | inode->i_mapping->a_ops = &ecryptfs_aops; | ||
109 | } | ||
110 | |||
111 | /** | ||
112 | * ecryptfs_statfs | 96 | * ecryptfs_statfs |
113 | * @sb: The ecryptfs super block | 97 | * @sb: The ecryptfs super block |
114 | * @buf: The struct kstatfs to fill in with stats | 98 | * @buf: The struct kstatfs to fill in with stats |
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 68b2e43d7c35..3451d23c3bae 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
@@ -3392,7 +3392,7 @@ int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode) | |||
3392 | * so would cause a commit on atime updates, which we don't bother doing. | 3392 | * so would cause a commit on atime updates, which we don't bother doing. |
3393 | * We handle synchronous inodes at the highest possible level. | 3393 | * We handle synchronous inodes at the highest possible level. |
3394 | */ | 3394 | */ |
3395 | void ext3_dirty_inode(struct inode *inode) | 3395 | void ext3_dirty_inode(struct inode *inode, int flags) |
3396 | { | 3396 | { |
3397 | handle_t *current_handle = ext3_journal_current_handle(); | 3397 | handle_t *current_handle = ext3_journal_current_handle(); |
3398 | handle_t *handle; | 3398 | handle_t *handle; |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index a74b89c09f90..1921392cd708 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -1813,7 +1813,7 @@ extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
1813 | extern void ext4_evict_inode(struct inode *); | 1813 | extern void ext4_evict_inode(struct inode *); |
1814 | extern void ext4_clear_inode(struct inode *); | 1814 | extern void ext4_clear_inode(struct inode *); |
1815 | extern int ext4_sync_inode(handle_t *, struct inode *); | 1815 | extern int ext4_sync_inode(handle_t *, struct inode *); |
1816 | extern void ext4_dirty_inode(struct inode *); | 1816 | extern void ext4_dirty_inode(struct inode *, int); |
1817 | extern int ext4_change_inode_journal_flag(struct inode *, int); | 1817 | extern int ext4_change_inode_journal_flag(struct inode *, int); |
1818 | extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); | 1818 | extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); |
1819 | extern int ext4_can_truncate(struct inode *inode); | 1819 | extern int ext4_can_truncate(struct inode *inode); |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 50d0e9c64584..a5763e3505ba 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -5733,7 +5733,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) | |||
5733 | * so would cause a commit on atime updates, which we don't bother doing. | 5733 | * so would cause a commit on atime updates, which we don't bother doing. |
5734 | * We handle synchronous inodes at the highest possible level. | 5734 | * We handle synchronous inodes at the highest possible level. |
5735 | */ | 5735 | */ |
5736 | void ext4_dirty_inode(struct inode *inode) | 5736 | void ext4_dirty_inode(struct inode *inode, int flags) |
5737 | { | 5737 | { |
5738 | handle_t *handle; | 5738 | handle_t *handle; |
5739 | 5739 | ||
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index be15437c272e..3b222dafd15b 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c | |||
@@ -326,8 +326,6 @@ static int msdos_rmdir(struct inode *dir, struct dentry *dentry) | |||
326 | struct fat_slot_info sinfo; | 326 | struct fat_slot_info sinfo; |
327 | int err; | 327 | int err; |
328 | 328 | ||
329 | dentry_unhash(dentry); | ||
330 | |||
331 | lock_super(sb); | 329 | lock_super(sb); |
332 | /* | 330 | /* |
333 | * Check whether the directory is not in use, then check | 331 | * Check whether the directory is not in use, then check |
@@ -459,9 +457,6 @@ static int do_msdos_rename(struct inode *old_dir, unsigned char *old_name, | |||
459 | old_inode = old_dentry->d_inode; | 457 | old_inode = old_dentry->d_inode; |
460 | new_inode = new_dentry->d_inode; | 458 | new_inode = new_dentry->d_inode; |
461 | 459 | ||
462 | if (new_inode && S_ISDIR(new_inode->i_mode)) | ||
463 | dentry_unhash(new_dentry); | ||
464 | |||
465 | err = fat_scan(old_dir, old_name, &old_sinfo); | 460 | err = fat_scan(old_dir, old_name, &old_sinfo); |
466 | if (err) { | 461 | if (err) { |
467 | err = -EIO; | 462 | err = -EIO; |
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index c61a6789f36c..20b4ea53fdc4 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c | |||
@@ -824,8 +824,6 @@ static int vfat_rmdir(struct inode *dir, struct dentry *dentry) | |||
824 | struct fat_slot_info sinfo; | 824 | struct fat_slot_info sinfo; |
825 | int err; | 825 | int err; |
826 | 826 | ||
827 | dentry_unhash(dentry); | ||
828 | |||
829 | lock_super(sb); | 827 | lock_super(sb); |
830 | 828 | ||
831 | err = fat_dir_empty(inode); | 829 | err = fat_dir_empty(inode); |
@@ -933,9 +931,6 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
933 | int err, is_dir, update_dotdot, corrupt = 0; | 931 | int err, is_dir, update_dotdot, corrupt = 0; |
934 | struct super_block *sb = old_dir->i_sb; | 932 | struct super_block *sb = old_dir->i_sb; |
935 | 933 | ||
936 | if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) | ||
937 | dentry_unhash(new_dentry); | ||
938 | |||
939 | old_sinfo.bh = sinfo.bh = dotdot_bh = NULL; | 934 | old_sinfo.bh = sinfo.bh = dotdot_bh = NULL; |
940 | old_inode = old_dentry->d_inode; | 935 | old_inode = old_dentry->d_inode; |
941 | new_inode = new_dentry->d_inode; | 936 | new_inode = new_dentry->d_inode; |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 34591ee804b5..0f015a0468de 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -1007,9 +1007,6 @@ static noinline void block_dump___mark_inode_dirty(struct inode *inode) | |||
1007 | * In short, make sure you hash any inodes _before_ you start marking | 1007 | * In short, make sure you hash any inodes _before_ you start marking |
1008 | * them dirty. | 1008 | * them dirty. |
1009 | * | 1009 | * |
1010 | * This function *must* be atomic for the I_DIRTY_PAGES case - | ||
1011 | * set_page_dirty() is called under spinlock in several places. | ||
1012 | * | ||
1013 | * Note that for blockdevs, inode->dirtied_when represents the dirtying time of | 1010 | * Note that for blockdevs, inode->dirtied_when represents the dirtying time of |
1014 | * the block-special inode (/dev/hda1) itself. And the ->dirtied_when field of | 1011 | * the block-special inode (/dev/hda1) itself. And the ->dirtied_when field of |
1015 | * the kernel-internal blockdev inode represents the dirtying time of the | 1012 | * the kernel-internal blockdev inode represents the dirtying time of the |
@@ -1028,7 +1025,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
1028 | */ | 1025 | */ |
1029 | if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { | 1026 | if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { |
1030 | if (sb->s_op->dirty_inode) | 1027 | if (sb->s_op->dirty_inode) |
1031 | sb->s_op->dirty_inode(inode); | 1028 | sb->s_op->dirty_inode(inode, flags); |
1032 | } | 1029 | } |
1033 | 1030 | ||
1034 | /* | 1031 | /* |
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 0d0e3faddcfa..d50160714595 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c | |||
@@ -667,8 +667,6 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry) | |||
667 | if (IS_ERR(req)) | 667 | if (IS_ERR(req)) |
668 | return PTR_ERR(req); | 668 | return PTR_ERR(req); |
669 | 669 | ||
670 | dentry_unhash(entry); | ||
671 | |||
672 | req->in.h.opcode = FUSE_RMDIR; | 670 | req->in.h.opcode = FUSE_RMDIR; |
673 | req->in.h.nodeid = get_node_id(dir); | 671 | req->in.h.nodeid = get_node_id(dir); |
674 | req->in.numargs = 1; | 672 | req->in.numargs = 1; |
@@ -694,9 +692,6 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent, | |||
694 | struct fuse_conn *fc = get_fuse_conn(olddir); | 692 | struct fuse_conn *fc = get_fuse_conn(olddir); |
695 | struct fuse_req *req = fuse_get_req(fc); | 693 | struct fuse_req *req = fuse_get_req(fc); |
696 | 694 | ||
697 | if (newent->d_inode && S_ISDIR(newent->d_inode->i_mode)) | ||
698 | dentry_unhash(newent); | ||
699 | |||
700 | if (IS_ERR(req)) | 695 | if (IS_ERR(req)) |
701 | return PTR_ERR(req); | 696 | return PTR_ERR(req); |
702 | 697 | ||
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c index 1cb70cdba2c1..b4d70b13be92 100644 --- a/fs/hfs/dir.c +++ b/fs/hfs/dir.c | |||
@@ -253,9 +253,6 @@ static int hfs_remove(struct inode *dir, struct dentry *dentry) | |||
253 | struct inode *inode = dentry->d_inode; | 253 | struct inode *inode = dentry->d_inode; |
254 | int res; | 254 | int res; |
255 | 255 | ||
256 | if (S_ISDIR(inode->i_mode)) | ||
257 | dentry_unhash(dentry); | ||
258 | |||
259 | if (S_ISDIR(inode->i_mode) && inode->i_size != 2) | 256 | if (S_ISDIR(inode->i_mode) && inode->i_size != 2) |
260 | return -ENOTEMPTY; | 257 | return -ENOTEMPTY; |
261 | res = hfs_cat_delete(inode->i_ino, dir, &dentry->d_name); | 258 | res = hfs_cat_delete(inode->i_ino, dir, &dentry->d_name); |
@@ -286,9 +283,6 @@ static int hfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
286 | 283 | ||
287 | /* Unlink destination if it already exists */ | 284 | /* Unlink destination if it already exists */ |
288 | if (new_dentry->d_inode) { | 285 | if (new_dentry->d_inode) { |
289 | if (S_ISDIR(new_dentry->d_inode->i_mode)) | ||
290 | dentry_unhash(new_dentry); | ||
291 | |||
292 | res = hfs_remove(new_dir, new_dentry); | 286 | res = hfs_remove(new_dir, new_dentry); |
293 | if (res) | 287 | if (res) |
294 | return res; | 288 | return res; |
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index b28835091dd0..4df5059c25da 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c | |||
@@ -370,8 +370,6 @@ static int hfsplus_rmdir(struct inode *dir, struct dentry *dentry) | |||
370 | struct inode *inode = dentry->d_inode; | 370 | struct inode *inode = dentry->d_inode; |
371 | int res; | 371 | int res; |
372 | 372 | ||
373 | dentry_unhash(dentry); | ||
374 | |||
375 | if (inode->i_size != 2) | 373 | if (inode->i_size != 2) |
376 | return -ENOTEMPTY; | 374 | return -ENOTEMPTY; |
377 | 375 | ||
@@ -469,12 +467,10 @@ static int hfsplus_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
469 | 467 | ||
470 | /* Unlink destination if it already exists */ | 468 | /* Unlink destination if it already exists */ |
471 | if (new_dentry->d_inode) { | 469 | if (new_dentry->d_inode) { |
472 | if (S_ISDIR(new_dentry->d_inode->i_mode)) { | 470 | if (S_ISDIR(new_dentry->d_inode->i_mode)) |
473 | dentry_unhash(new_dentry); | ||
474 | res = hfsplus_rmdir(new_dir, new_dentry); | 471 | res = hfsplus_rmdir(new_dir, new_dentry); |
475 | } else { | 472 | else |
476 | res = hfsplus_unlink(new_dir, new_dentry); | 473 | res = hfsplus_unlink(new_dir, new_dentry); |
477 | } | ||
478 | if (res) | 474 | if (res) |
479 | return res; | 475 | return res; |
480 | } | 476 | } |
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index e6816b9e6903..2638c834ed28 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c | |||
@@ -683,8 +683,6 @@ int hostfs_rmdir(struct inode *ino, struct dentry *dentry) | |||
683 | char *file; | 683 | char *file; |
684 | int err; | 684 | int err; |
685 | 685 | ||
686 | dentry_unhash(dentry); | ||
687 | |||
688 | if ((file = dentry_name(dentry)) == NULL) | 686 | if ((file = dentry_name(dentry)) == NULL) |
689 | return -ENOMEM; | 687 | return -ENOMEM; |
690 | err = do_rmdir(file); | 688 | err = do_rmdir(file); |
@@ -738,9 +736,6 @@ int hostfs_rename(struct inode *from_ino, struct dentry *from, | |||
738 | char *from_name, *to_name; | 736 | char *from_name, *to_name; |
739 | int err; | 737 | int err; |
740 | 738 | ||
741 | if (to->d_inode && S_ISDIR(to->d_inode->i_mode)) | ||
742 | dentry_unhash(to); | ||
743 | |||
744 | if ((from_name = dentry_name(from)) == NULL) | 739 | if ((from_name = dentry_name(from)) == NULL) |
745 | return -ENOMEM; | 740 | return -ENOMEM; |
746 | if ((to_name = dentry_name(to)) == NULL) { | 741 | if ((to_name = dentry_name(to)) == NULL) { |
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index ff0ce21c0867..acf95dab2aac 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c | |||
@@ -439,8 +439,6 @@ static int hpfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
439 | int err; | 439 | int err; |
440 | int r; | 440 | int r; |
441 | 441 | ||
442 | dentry_unhash(dentry); | ||
443 | |||
444 | hpfs_adjust_length(name, &len); | 442 | hpfs_adjust_length(name, &len); |
445 | hpfs_lock(dir->i_sb); | 443 | hpfs_lock(dir->i_sb); |
446 | err = -ENOENT; | 444 | err = -ENOENT; |
@@ -535,9 +533,6 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
535 | struct fnode *fnode; | 533 | struct fnode *fnode; |
536 | int err; | 534 | int err; |
537 | 535 | ||
538 | if (new_inode && S_ISDIR(new_inode->i_mode)) | ||
539 | dentry_unhash(new_dentry); | ||
540 | |||
541 | if ((err = hpfs_chk_name(new_name, &new_len))) return err; | 536 | if ((err = hpfs_chk_name(new_name, &new_len))) return err; |
542 | err = 0; | 537 | err = 0; |
543 | hpfs_adjust_length(old_name, &old_len); | 538 | hpfs_adjust_length(old_name, &old_len); |
diff --git a/fs/inode.c b/fs/inode.c index 990d284877a1..0f7e88a7803f 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -1,9 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * linux/fs/inode.c | ||
3 | * | ||
4 | * (C) 1997 Linus Torvalds | 2 | * (C) 1997 Linus Torvalds |
3 | * (C) 1999 Andrea Arcangeli <andrea@suse.de> (dynamic inode allocation) | ||
5 | */ | 4 | */ |
6 | |||
7 | #include <linux/fs.h> | 5 | #include <linux/fs.h> |
8 | #include <linux/mm.h> | 6 | #include <linux/mm.h> |
9 | #include <linux/dcache.h> | 7 | #include <linux/dcache.h> |
@@ -27,10 +25,11 @@ | |||
27 | #include <linux/prefetch.h> | 25 | #include <linux/prefetch.h> |
28 | #include <linux/ima.h> | 26 | #include <linux/ima.h> |
29 | #include <linux/cred.h> | 27 | #include <linux/cred.h> |
28 | #include <linux/buffer_head.h> /* for inode_has_buffers */ | ||
30 | #include "internal.h" | 29 | #include "internal.h" |
31 | 30 | ||
32 | /* | 31 | /* |
33 | * inode locking rules. | 32 | * Inode locking rules: |
34 | * | 33 | * |
35 | * inode->i_lock protects: | 34 | * inode->i_lock protects: |
36 | * inode->i_state, inode->i_hash, __iget() | 35 | * inode->i_state, inode->i_hash, __iget() |
@@ -60,54 +59,11 @@ | |||
60 | * inode_hash_lock | 59 | * inode_hash_lock |
61 | */ | 60 | */ |
62 | 61 | ||
63 | /* | ||
64 | * This is needed for the following functions: | ||
65 | * - inode_has_buffers | ||
66 | * - invalidate_bdev | ||
67 | * | ||
68 | * FIXME: remove all knowledge of the buffer layer from this file | ||
69 | */ | ||
70 | #include <linux/buffer_head.h> | ||
71 | |||
72 | /* | ||
73 | * New inode.c implementation. | ||
74 | * | ||
75 | * This implementation has the basic premise of trying | ||
76 | * to be extremely low-overhead and SMP-safe, yet be | ||
77 | * simple enough to be "obviously correct". | ||
78 | * | ||
79 | * Famous last words. | ||
80 | */ | ||
81 | |||
82 | /* inode dynamic allocation 1999, Andrea Arcangeli <andrea@suse.de> */ | ||
83 | |||
84 | /* #define INODE_PARANOIA 1 */ | ||
85 | /* #define INODE_DEBUG 1 */ | ||
86 | |||
87 | /* | ||
88 | * Inode lookup is no longer as critical as it used to be: | ||
89 | * most of the lookups are going to be through the dcache. | ||
90 | */ | ||
91 | #define I_HASHBITS i_hash_shift | ||
92 | #define I_HASHMASK i_hash_mask | ||
93 | |||
94 | static unsigned int i_hash_mask __read_mostly; | 62 | static unsigned int i_hash_mask __read_mostly; |
95 | static unsigned int i_hash_shift __read_mostly; | 63 | static unsigned int i_hash_shift __read_mostly; |
96 | static struct hlist_head *inode_hashtable __read_mostly; | 64 | static struct hlist_head *inode_hashtable __read_mostly; |
97 | static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock); | 65 | static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock); |
98 | 66 | ||
99 | /* | ||
100 | * Each inode can be on two separate lists. One is | ||
101 | * the hash list of the inode, used for lookups. The | ||
102 | * other linked list is the "type" list: | ||
103 | * "in_use" - valid inode, i_count > 0, i_nlink > 0 | ||
104 | * "dirty" - as "in_use" but also dirty | ||
105 | * "unused" - valid inode, i_count = 0 | ||
106 | * | ||
107 | * A "dirty" list is maintained for each super block, | ||
108 | * allowing for low-overhead inode sync() operations. | ||
109 | */ | ||
110 | |||
111 | static LIST_HEAD(inode_lru); | 67 | static LIST_HEAD(inode_lru); |
112 | static DEFINE_SPINLOCK(inode_lru_lock); | 68 | static DEFINE_SPINLOCK(inode_lru_lock); |
113 | 69 | ||
@@ -424,8 +380,8 @@ static unsigned long hash(struct super_block *sb, unsigned long hashval) | |||
424 | 380 | ||
425 | tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) / | 381 | tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) / |
426 | L1_CACHE_BYTES; | 382 | L1_CACHE_BYTES; |
427 | tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS); | 383 | tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> i_hash_shift); |
428 | return tmp & I_HASHMASK; | 384 | return tmp & i_hash_mask; |
429 | } | 385 | } |
430 | 386 | ||
431 | /** | 387 | /** |
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index 9a1e86fc1362..4bca6a2e5c07 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c | |||
@@ -605,8 +605,6 @@ static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry) | |||
605 | int ret; | 605 | int ret; |
606 | uint32_t now = get_seconds(); | 606 | uint32_t now = get_seconds(); |
607 | 607 | ||
608 | dentry_unhash(dentry); | ||
609 | |||
610 | for (fd = f->dents ; fd; fd = fd->next) { | 608 | for (fd = f->dents ; fd; fd = fd->next) { |
611 | if (fd->ino) | 609 | if (fd->ino) |
612 | return -ENOTEMPTY; | 610 | return -ENOTEMPTY; |
@@ -782,9 +780,6 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry, | |||
782 | uint8_t type; | 780 | uint8_t type; |
783 | uint32_t now; | 781 | uint32_t now; |
784 | 782 | ||
785 | if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) | ||
786 | dentry_unhash(new_dentry); | ||
787 | |||
788 | /* The VFS will check for us and prevent trying to rename a | 783 | /* The VFS will check for us and prevent trying to rename a |
789 | * file over a directory and vice versa, but if it's a directory, | 784 | * file over a directory and vice versa, but if it's a directory, |
790 | * the VFS can't check whether the victim is empty. The filesystem | 785 | * the VFS can't check whether the victim is empty. The filesystem |
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index e896e67767eb..46ad619b6124 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c | |||
@@ -357,7 +357,7 @@ error: | |||
357 | return ERR_PTR(ret); | 357 | return ERR_PTR(ret); |
358 | } | 358 | } |
359 | 359 | ||
360 | void jffs2_dirty_inode(struct inode *inode) | 360 | void jffs2_dirty_inode(struct inode *inode, int flags) |
361 | { | 361 | { |
362 | struct iattr iattr; | 362 | struct iattr iattr; |
363 | 363 | ||
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index 00bae7cc2e48..65c6c43ca482 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h | |||
@@ -172,7 +172,7 @@ int jffs2_setattr (struct dentry *, struct iattr *); | |||
172 | int jffs2_do_setattr (struct inode *, struct iattr *); | 172 | int jffs2_do_setattr (struct inode *, struct iattr *); |
173 | struct inode *jffs2_iget(struct super_block *, unsigned long); | 173 | struct inode *jffs2_iget(struct super_block *, unsigned long); |
174 | void jffs2_evict_inode (struct inode *); | 174 | void jffs2_evict_inode (struct inode *); |
175 | void jffs2_dirty_inode(struct inode *inode); | 175 | void jffs2_dirty_inode(struct inode *inode, int flags); |
176 | struct inode *jffs2_new_inode (struct inode *dir_i, int mode, | 176 | struct inode *jffs2_new_inode (struct inode *dir_i, int mode, |
177 | struct jffs2_raw_inode *ri); | 177 | struct jffs2_raw_inode *ri); |
178 | int jffs2_statfs (struct dentry *, struct kstatfs *); | 178 | int jffs2_statfs (struct dentry *, struct kstatfs *); |
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index eddbb373209e..109655904bbc 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c | |||
@@ -173,7 +173,7 @@ void jfs_evict_inode(struct inode *inode) | |||
173 | dquot_drop(inode); | 173 | dquot_drop(inode); |
174 | } | 174 | } |
175 | 175 | ||
176 | void jfs_dirty_inode(struct inode *inode) | 176 | void jfs_dirty_inode(struct inode *inode, int flags) |
177 | { | 177 | { |
178 | static int noisy = 5; | 178 | static int noisy = 5; |
179 | 179 | ||
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h index 155e91eff07d..ec2fb8b945fc 100644 --- a/fs/jfs/jfs_inode.h +++ b/fs/jfs/jfs_inode.h | |||
@@ -28,7 +28,7 @@ extern struct inode *jfs_iget(struct super_block *, unsigned long); | |||
28 | extern int jfs_commit_inode(struct inode *, int); | 28 | extern int jfs_commit_inode(struct inode *, int); |
29 | extern int jfs_write_inode(struct inode *, struct writeback_control *); | 29 | extern int jfs_write_inode(struct inode *, struct writeback_control *); |
30 | extern void jfs_evict_inode(struct inode *); | 30 | extern void jfs_evict_inode(struct inode *); |
31 | extern void jfs_dirty_inode(struct inode *); | 31 | extern void jfs_dirty_inode(struct inode *, int); |
32 | extern void jfs_truncate(struct inode *); | 32 | extern void jfs_truncate(struct inode *); |
33 | extern void jfs_truncate_nolock(struct inode *, loff_t); | 33 | extern void jfs_truncate_nolock(struct inode *, loff_t); |
34 | extern void jfs_free_zero_link(struct inode *); | 34 | extern void jfs_free_zero_link(struct inode *); |
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 865df16a6cf3..eaaf2b511e89 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c | |||
@@ -360,8 +360,6 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry) | |||
360 | 360 | ||
361 | jfs_info("jfs_rmdir: dip:0x%p name:%s", dip, dentry->d_name.name); | 361 | jfs_info("jfs_rmdir: dip:0x%p name:%s", dip, dentry->d_name.name); |
362 | 362 | ||
363 | dentry_unhash(dentry); | ||
364 | |||
365 | /* Init inode for quota operations. */ | 363 | /* Init inode for quota operations. */ |
366 | dquot_initialize(dip); | 364 | dquot_initialize(dip); |
367 | dquot_initialize(ip); | 365 | dquot_initialize(ip); |
@@ -1097,9 +1095,6 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1097 | jfs_info("jfs_rename: %s %s", old_dentry->d_name.name, | 1095 | jfs_info("jfs_rename: %s %s", old_dentry->d_name.name, |
1098 | new_dentry->d_name.name); | 1096 | new_dentry->d_name.name); |
1099 | 1097 | ||
1100 | if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) | ||
1101 | dentry_unhash(new_dentry); | ||
1102 | |||
1103 | dquot_initialize(old_dir); | 1098 | dquot_initialize(old_dir); |
1104 | dquot_initialize(new_dir); | 1099 | dquot_initialize(new_dir); |
1105 | 1100 | ||
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c index f34c9cde9e94..9ed89d1663f8 100644 --- a/fs/logfs/dir.c +++ b/fs/logfs/dir.c | |||
@@ -273,8 +273,6 @@ static int logfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
273 | { | 273 | { |
274 | struct inode *inode = dentry->d_inode; | 274 | struct inode *inode = dentry->d_inode; |
275 | 275 | ||
276 | dentry_unhash(dentry); | ||
277 | |||
278 | if (!logfs_empty_dir(inode)) | 276 | if (!logfs_empty_dir(inode)) |
279 | return -ENOTEMPTY; | 277 | return -ENOTEMPTY; |
280 | 278 | ||
@@ -624,9 +622,6 @@ static int logfs_rename_cross(struct inode *old_dir, struct dentry *old_dentry, | |||
624 | loff_t pos; | 622 | loff_t pos; |
625 | int err; | 623 | int err; |
626 | 624 | ||
627 | if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) | ||
628 | dentry_unhash(new_dentry); | ||
629 | |||
630 | /* 1. locate source dd */ | 625 | /* 1. locate source dd */ |
631 | err = logfs_get_dd(old_dir, old_dentry, &dd, &pos); | 626 | err = logfs_get_dd(old_dir, old_dentry, &dd, &pos); |
632 | if (err) | 627 | if (err) |
diff --git a/fs/minix/namei.c b/fs/minix/namei.c index f60aed8db9c4..6e6777f1b4b2 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c | |||
@@ -168,8 +168,6 @@ static int minix_rmdir(struct inode * dir, struct dentry *dentry) | |||
168 | struct inode * inode = dentry->d_inode; | 168 | struct inode * inode = dentry->d_inode; |
169 | int err = -ENOTEMPTY; | 169 | int err = -ENOTEMPTY; |
170 | 170 | ||
171 | dentry_unhash(dentry); | ||
172 | |||
173 | if (minix_empty_dir(inode)) { | 171 | if (minix_empty_dir(inode)) { |
174 | err = minix_unlink(dir, dentry); | 172 | err = minix_unlink(dir, dentry); |
175 | if (!err) { | 173 | if (!err) { |
@@ -192,9 +190,6 @@ static int minix_rename(struct inode * old_dir, struct dentry *old_dentry, | |||
192 | struct minix_dir_entry * old_de; | 190 | struct minix_dir_entry * old_de; |
193 | int err = -ENOENT; | 191 | int err = -ENOENT; |
194 | 192 | ||
195 | if (new_inode && S_ISDIR(new_inode->i_mode)) | ||
196 | dentry_unhash(new_dentry); | ||
197 | |||
198 | old_de = minix_find_entry(old_dentry, &old_page); | 193 | old_de = minix_find_entry(old_dentry, &old_page); |
199 | if (!old_de) | 194 | if (!old_de) |
200 | goto out; | 195 | goto out; |
diff --git a/fs/namei.c b/fs/namei.c index 2358b326b221..e2e4e8d032ee 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -919,12 +919,11 @@ static inline bool managed_dentry_might_block(struct dentry *dentry) | |||
919 | } | 919 | } |
920 | 920 | ||
921 | /* | 921 | /* |
922 | * Skip to top of mountpoint pile in rcuwalk mode. We abort the rcu-walk if we | 922 | * Try to skip to top of mountpoint pile in rcuwalk mode. Fail if |
923 | * meet a managed dentry and we're not walking to "..". True is returned to | 923 | * we meet a managed dentry that would need blocking. |
924 | * continue, false to abort. | ||
925 | */ | 924 | */ |
926 | static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, | 925 | static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, |
927 | struct inode **inode, bool reverse_transit) | 926 | struct inode **inode) |
928 | { | 927 | { |
929 | for (;;) { | 928 | for (;;) { |
930 | struct vfsmount *mounted; | 929 | struct vfsmount *mounted; |
@@ -933,8 +932,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, | |||
933 | * that wants to block transit. | 932 | * that wants to block transit. |
934 | */ | 933 | */ |
935 | *inode = path->dentry->d_inode; | 934 | *inode = path->dentry->d_inode; |
936 | if (!reverse_transit && | 935 | if (unlikely(managed_dentry_might_block(path->dentry))) |
937 | unlikely(managed_dentry_might_block(path->dentry))) | ||
938 | return false; | 936 | return false; |
939 | 937 | ||
940 | if (!d_mountpoint(path->dentry)) | 938 | if (!d_mountpoint(path->dentry)) |
@@ -947,16 +945,24 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, | |||
947 | path->dentry = mounted->mnt_root; | 945 | path->dentry = mounted->mnt_root; |
948 | nd->seq = read_seqcount_begin(&path->dentry->d_seq); | 946 | nd->seq = read_seqcount_begin(&path->dentry->d_seq); |
949 | } | 947 | } |
950 | |||
951 | if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT)) | ||
952 | return reverse_transit; | ||
953 | return true; | 948 | return true; |
954 | } | 949 | } |
955 | 950 | ||
956 | static int follow_dotdot_rcu(struct nameidata *nd) | 951 | static void follow_mount_rcu(struct nameidata *nd) |
957 | { | 952 | { |
958 | struct inode *inode = nd->inode; | 953 | while (d_mountpoint(nd->path.dentry)) { |
954 | struct vfsmount *mounted; | ||
955 | mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry, 1); | ||
956 | if (!mounted) | ||
957 | break; | ||
958 | nd->path.mnt = mounted; | ||
959 | nd->path.dentry = mounted->mnt_root; | ||
960 | nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); | ||
961 | } | ||
962 | } | ||
959 | 963 | ||
964 | static int follow_dotdot_rcu(struct nameidata *nd) | ||
965 | { | ||
960 | set_root_rcu(nd); | 966 | set_root_rcu(nd); |
961 | 967 | ||
962 | while (1) { | 968 | while (1) { |
@@ -972,7 +978,6 @@ static int follow_dotdot_rcu(struct nameidata *nd) | |||
972 | seq = read_seqcount_begin(&parent->d_seq); | 978 | seq = read_seqcount_begin(&parent->d_seq); |
973 | if (read_seqcount_retry(&old->d_seq, nd->seq)) | 979 | if (read_seqcount_retry(&old->d_seq, nd->seq)) |
974 | goto failed; | 980 | goto failed; |
975 | inode = parent->d_inode; | ||
976 | nd->path.dentry = parent; | 981 | nd->path.dentry = parent; |
977 | nd->seq = seq; | 982 | nd->seq = seq; |
978 | break; | 983 | break; |
@@ -980,10 +985,9 @@ static int follow_dotdot_rcu(struct nameidata *nd) | |||
980 | if (!follow_up_rcu(&nd->path)) | 985 | if (!follow_up_rcu(&nd->path)) |
981 | break; | 986 | break; |
982 | nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); | 987 | nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); |
983 | inode = nd->path.dentry->d_inode; | ||
984 | } | 988 | } |
985 | __follow_mount_rcu(nd, &nd->path, &inode, true); | 989 | follow_mount_rcu(nd); |
986 | nd->inode = inode; | 990 | nd->inode = nd->path.dentry->d_inode; |
987 | return 0; | 991 | return 0; |
988 | 992 | ||
989 | failed: | 993 | failed: |
@@ -1157,8 +1161,11 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, | |||
1157 | } | 1161 | } |
1158 | path->mnt = mnt; | 1162 | path->mnt = mnt; |
1159 | path->dentry = dentry; | 1163 | path->dentry = dentry; |
1160 | if (likely(__follow_mount_rcu(nd, path, inode, false))) | 1164 | if (unlikely(!__follow_mount_rcu(nd, path, inode))) |
1161 | return 0; | 1165 | goto unlazy; |
1166 | if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT)) | ||
1167 | goto unlazy; | ||
1168 | return 0; | ||
1162 | unlazy: | 1169 | unlazy: |
1163 | if (unlazy_walk(nd, dentry)) | 1170 | if (unlazy_walk(nd, dentry)) |
1164 | return -ECHILD; | 1171 | return -ECHILD; |
@@ -2572,6 +2579,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
2572 | if (error) | 2579 | if (error) |
2573 | goto out; | 2580 | goto out; |
2574 | 2581 | ||
2582 | shrink_dcache_parent(dentry); | ||
2575 | error = dir->i_op->rmdir(dir, dentry); | 2583 | error = dir->i_op->rmdir(dir, dentry); |
2576 | if (error) | 2584 | if (error) |
2577 | goto out; | 2585 | goto out; |
@@ -2986,6 +2994,8 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, | |||
2986 | if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry)) | 2994 | if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry)) |
2987 | goto out; | 2995 | goto out; |
2988 | 2996 | ||
2997 | if (target) | ||
2998 | shrink_dcache_parent(new_dentry); | ||
2989 | error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); | 2999 | error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); |
2990 | if (error) | 3000 | if (error) |
2991 | goto out; | 3001 | goto out; |
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index e3e646b06404..9c51f621e901 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c | |||
@@ -1033,8 +1033,11 @@ static int ncp_rmdir(struct inode *dir, struct dentry *dentry) | |||
1033 | DPRINTK("ncp_rmdir: removing %s/%s\n", | 1033 | DPRINTK("ncp_rmdir: removing %s/%s\n", |
1034 | dentry->d_parent->d_name.name, dentry->d_name.name); | 1034 | dentry->d_parent->d_name.name, dentry->d_name.name); |
1035 | 1035 | ||
1036 | /* | ||
1037 | * fail with EBUSY if there are still references to this | ||
1038 | * directory. | ||
1039 | */ | ||
1036 | dentry_unhash(dentry); | 1040 | dentry_unhash(dentry); |
1037 | |||
1038 | error = -EBUSY; | 1041 | error = -EBUSY; |
1039 | if (!d_unhashed(dentry)) | 1042 | if (!d_unhashed(dentry)) |
1040 | goto out; | 1043 | goto out; |
@@ -1141,8 +1144,16 @@ static int ncp_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1141 | old_dentry->d_parent->d_name.name, old_dentry->d_name.name, | 1144 | old_dentry->d_parent->d_name.name, old_dentry->d_name.name, |
1142 | new_dentry->d_parent->d_name.name, new_dentry->d_name.name); | 1145 | new_dentry->d_parent->d_name.name, new_dentry->d_name.name); |
1143 | 1146 | ||
1144 | if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) | 1147 | if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) { |
1148 | /* | ||
1149 | * fail with EBUSY if there are still references to this | ||
1150 | * directory. | ||
1151 | */ | ||
1145 | dentry_unhash(new_dentry); | 1152 | dentry_unhash(new_dentry); |
1153 | error = -EBUSY; | ||
1154 | if (!d_unhashed(new_dentry)) | ||
1155 | goto out; | ||
1156 | } | ||
1146 | 1157 | ||
1147 | ncp_age_dentry(server, old_dentry); | 1158 | ncp_age_dentry(server, old_dentry); |
1148 | ncp_age_dentry(server, new_dentry); | 1159 | ncp_age_dentry(server, new_dentry); |
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index ba306658a6db..81515545ba75 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig | |||
@@ -87,6 +87,16 @@ config NFS_V4_1 | |||
87 | config PNFS_FILE_LAYOUT | 87 | config PNFS_FILE_LAYOUT |
88 | tristate | 88 | tristate |
89 | 89 | ||
90 | config PNFS_OBJLAYOUT | ||
91 | tristate "Provide support for the pNFS Objects Layout Driver for NFSv4.1 pNFS (EXPERIMENTAL)" | ||
92 | depends on NFS_FS && NFS_V4_1 && SCSI_OSD_ULD | ||
93 | help | ||
94 | Say M here if you want your pNFS client to support the Objects Layout Driver. | ||
95 | Requires the SCSI osd initiator library (SCSI_OSD_INITIATOR) and | ||
96 | upper level driver (SCSI_OSD_ULD). | ||
97 | |||
98 | If unsure, say N. | ||
99 | |||
90 | config ROOT_NFS | 100 | config ROOT_NFS |
91 | bool "Root file system on NFS" | 101 | bool "Root file system on NFS" |
92 | depends on NFS_FS=y && IP_PNP | 102 | depends on NFS_FS=y && IP_PNP |
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 4776ff9e3814..6a34f7dd0e6f 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile | |||
@@ -15,9 +15,11 @@ nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ | |||
15 | delegation.o idmap.o \ | 15 | delegation.o idmap.o \ |
16 | callback.o callback_xdr.o callback_proc.o \ | 16 | callback.o callback_xdr.o callback_proc.o \ |
17 | nfs4namespace.o | 17 | nfs4namespace.o |
18 | nfs-$(CONFIG_NFS_V4_1) += pnfs.o | 18 | nfs-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o |
19 | nfs-$(CONFIG_SYSCTL) += sysctl.o | 19 | nfs-$(CONFIG_SYSCTL) += sysctl.o |
20 | nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o | 20 | nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o |
21 | 21 | ||
22 | obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o | 22 | obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o |
23 | nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o | 23 | nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o |
24 | |||
25 | obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/ | ||
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index 46d93ce7311b..b257383bb565 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h | |||
@@ -167,6 +167,23 @@ extern unsigned nfs4_callback_layoutrecall( | |||
167 | 167 | ||
168 | extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses); | 168 | extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses); |
169 | extern void nfs4_cb_take_slot(struct nfs_client *clp); | 169 | extern void nfs4_cb_take_slot(struct nfs_client *clp); |
170 | |||
171 | struct cb_devicenotifyitem { | ||
172 | uint32_t cbd_notify_type; | ||
173 | uint32_t cbd_layout_type; | ||
174 | struct nfs4_deviceid cbd_dev_id; | ||
175 | uint32_t cbd_immediate; | ||
176 | }; | ||
177 | |||
178 | struct cb_devicenotifyargs { | ||
179 | int ndevs; | ||
180 | struct cb_devicenotifyitem *devs; | ||
181 | }; | ||
182 | |||
183 | extern __be32 nfs4_callback_devicenotify( | ||
184 | struct cb_devicenotifyargs *args, | ||
185 | void *dummy, struct cb_process_state *cps); | ||
186 | |||
170 | #endif /* CONFIG_NFS_V4_1 */ | 187 | #endif /* CONFIG_NFS_V4_1 */ |
171 | extern int check_gss_callback_principal(struct nfs_client *, struct svc_rqst *); | 188 | extern int check_gss_callback_principal(struct nfs_client *, struct svc_rqst *); |
172 | extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, | 189 | extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, |
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 2f41dccea18e..d4d1954e9bb9 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c | |||
@@ -139,7 +139,7 @@ static u32 initiate_file_draining(struct nfs_client *clp, | |||
139 | spin_lock(&ino->i_lock); | 139 | spin_lock(&ino->i_lock); |
140 | if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || | 140 | if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || |
141 | mark_matching_lsegs_invalid(lo, &free_me_list, | 141 | mark_matching_lsegs_invalid(lo, &free_me_list, |
142 | args->cbl_range.iomode)) | 142 | &args->cbl_range)) |
143 | rv = NFS4ERR_DELAY; | 143 | rv = NFS4ERR_DELAY; |
144 | else | 144 | else |
145 | rv = NFS4ERR_NOMATCHING_LAYOUT; | 145 | rv = NFS4ERR_NOMATCHING_LAYOUT; |
@@ -184,7 +184,7 @@ static u32 initiate_bulk_draining(struct nfs_client *clp, | |||
184 | ino = lo->plh_inode; | 184 | ino = lo->plh_inode; |
185 | spin_lock(&ino->i_lock); | 185 | spin_lock(&ino->i_lock); |
186 | set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); | 186 | set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); |
187 | if (mark_matching_lsegs_invalid(lo, &free_me_list, range.iomode)) | 187 | if (mark_matching_lsegs_invalid(lo, &free_me_list, &range)) |
188 | rv = NFS4ERR_DELAY; | 188 | rv = NFS4ERR_DELAY; |
189 | list_del_init(&lo->plh_bulk_recall); | 189 | list_del_init(&lo->plh_bulk_recall); |
190 | spin_unlock(&ino->i_lock); | 190 | spin_unlock(&ino->i_lock); |
@@ -241,6 +241,53 @@ static void pnfs_recall_all_layouts(struct nfs_client *clp) | |||
241 | do_callback_layoutrecall(clp, &args); | 241 | do_callback_layoutrecall(clp, &args); |
242 | } | 242 | } |
243 | 243 | ||
244 | __be32 nfs4_callback_devicenotify(struct cb_devicenotifyargs *args, | ||
245 | void *dummy, struct cb_process_state *cps) | ||
246 | { | ||
247 | int i; | ||
248 | __be32 res = 0; | ||
249 | struct nfs_client *clp = cps->clp; | ||
250 | struct nfs_server *server = NULL; | ||
251 | |||
252 | dprintk("%s: -->\n", __func__); | ||
253 | |||
254 | if (!clp) { | ||
255 | res = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION); | ||
256 | goto out; | ||
257 | } | ||
258 | |||
259 | for (i = 0; i < args->ndevs; i++) { | ||
260 | struct cb_devicenotifyitem *dev = &args->devs[i]; | ||
261 | |||
262 | if (!server || | ||
263 | server->pnfs_curr_ld->id != dev->cbd_layout_type) { | ||
264 | rcu_read_lock(); | ||
265 | list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) | ||
266 | if (server->pnfs_curr_ld && | ||
267 | server->pnfs_curr_ld->id == dev->cbd_layout_type) { | ||
268 | rcu_read_unlock(); | ||
269 | goto found; | ||
270 | } | ||
271 | rcu_read_unlock(); | ||
272 | dprintk("%s: layout type %u not found\n", | ||
273 | __func__, dev->cbd_layout_type); | ||
274 | continue; | ||
275 | } | ||
276 | |||
277 | found: | ||
278 | if (dev->cbd_notify_type == NOTIFY_DEVICEID4_CHANGE) | ||
279 | dprintk("%s: NOTIFY_DEVICEID4_CHANGE not supported, " | ||
280 | "deleting instead\n", __func__); | ||
281 | nfs4_delete_deviceid(server->pnfs_curr_ld, clp, &dev->cbd_dev_id); | ||
282 | } | ||
283 | |||
284 | out: | ||
285 | kfree(args->devs); | ||
286 | dprintk("%s: exit with status = %u\n", | ||
287 | __func__, be32_to_cpu(res)); | ||
288 | return res; | ||
289 | } | ||
290 | |||
244 | int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid) | 291 | int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid) |
245 | { | 292 | { |
246 | if (delegation == NULL) | 293 | if (delegation == NULL) |
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 00ecf62ce7c1..c6c86a77e043 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c | |||
@@ -25,6 +25,7 @@ | |||
25 | 25 | ||
26 | #if defined(CONFIG_NFS_V4_1) | 26 | #if defined(CONFIG_NFS_V4_1) |
27 | #define CB_OP_LAYOUTRECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) | 27 | #define CB_OP_LAYOUTRECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) |
28 | #define CB_OP_DEVICENOTIFY_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) | ||
28 | #define CB_OP_SEQUENCE_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \ | 29 | #define CB_OP_SEQUENCE_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \ |
29 | 4 + 1 + 3) | 30 | 4 + 1 + 3) |
30 | #define CB_OP_RECALLANY_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) | 31 | #define CB_OP_RECALLANY_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) |
@@ -284,6 +285,93 @@ out: | |||
284 | return status; | 285 | return status; |
285 | } | 286 | } |
286 | 287 | ||
288 | static | ||
289 | __be32 decode_devicenotify_args(struct svc_rqst *rqstp, | ||
290 | struct xdr_stream *xdr, | ||
291 | struct cb_devicenotifyargs *args) | ||
292 | { | ||
293 | __be32 *p; | ||
294 | __be32 status = 0; | ||
295 | u32 tmp; | ||
296 | int n, i; | ||
297 | args->ndevs = 0; | ||
298 | |||
299 | /* Num of device notifications */ | ||
300 | p = read_buf(xdr, sizeof(uint32_t)); | ||
301 | if (unlikely(p == NULL)) { | ||
302 | status = htonl(NFS4ERR_BADXDR); | ||
303 | goto out; | ||
304 | } | ||
305 | n = ntohl(*p++); | ||
306 | if (n <= 0) | ||
307 | goto out; | ||
308 | |||
309 | args->devs = kmalloc(n * sizeof(*args->devs), GFP_KERNEL); | ||
310 | if (!args->devs) { | ||
311 | status = htonl(NFS4ERR_DELAY); | ||
312 | goto out; | ||
313 | } | ||
314 | |||
315 | /* Decode each dev notification */ | ||
316 | for (i = 0; i < n; i++) { | ||
317 | struct cb_devicenotifyitem *dev = &args->devs[i]; | ||
318 | |||
319 | p = read_buf(xdr, (4 * sizeof(uint32_t)) + NFS4_DEVICEID4_SIZE); | ||
320 | if (unlikely(p == NULL)) { | ||
321 | status = htonl(NFS4ERR_BADXDR); | ||
322 | goto err; | ||
323 | } | ||
324 | |||
325 | tmp = ntohl(*p++); /* bitmap size */ | ||
326 | if (tmp != 1) { | ||
327 | status = htonl(NFS4ERR_INVAL); | ||
328 | goto err; | ||
329 | } | ||
330 | dev->cbd_notify_type = ntohl(*p++); | ||
331 | if (dev->cbd_notify_type != NOTIFY_DEVICEID4_CHANGE && | ||
332 | dev->cbd_notify_type != NOTIFY_DEVICEID4_DELETE) { | ||
333 | status = htonl(NFS4ERR_INVAL); | ||
334 | goto err; | ||
335 | } | ||
336 | |||
337 | tmp = ntohl(*p++); /* opaque size */ | ||
338 | if (((dev->cbd_notify_type == NOTIFY_DEVICEID4_CHANGE) && | ||
339 | (tmp != NFS4_DEVICEID4_SIZE + 8)) || | ||
340 | ((dev->cbd_notify_type == NOTIFY_DEVICEID4_DELETE) && | ||
341 | (tmp != NFS4_DEVICEID4_SIZE + 4))) { | ||
342 | status = htonl(NFS4ERR_INVAL); | ||
343 | goto err; | ||
344 | } | ||
345 | dev->cbd_layout_type = ntohl(*p++); | ||
346 | memcpy(dev->cbd_dev_id.data, p, NFS4_DEVICEID4_SIZE); | ||
347 | p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE); | ||
348 | |||
349 | if (dev->cbd_layout_type == NOTIFY_DEVICEID4_CHANGE) { | ||
350 | p = read_buf(xdr, sizeof(uint32_t)); | ||
351 | if (unlikely(p == NULL)) { | ||
352 | status = htonl(NFS4ERR_BADXDR); | ||
353 | goto err; | ||
354 | } | ||
355 | dev->cbd_immediate = ntohl(*p++); | ||
356 | } else { | ||
357 | dev->cbd_immediate = 0; | ||
358 | } | ||
359 | |||
360 | args->ndevs++; | ||
361 | |||
362 | dprintk("%s: type %d layout 0x%x immediate %d\n", | ||
363 | __func__, dev->cbd_notify_type, dev->cbd_layout_type, | ||
364 | dev->cbd_immediate); | ||
365 | } | ||
366 | out: | ||
367 | dprintk("%s: status %d ndevs %d\n", | ||
368 | __func__, ntohl(status), args->ndevs); | ||
369 | return status; | ||
370 | err: | ||
371 | kfree(args->devs); | ||
372 | goto out; | ||
373 | } | ||
374 | |||
287 | static __be32 decode_sessionid(struct xdr_stream *xdr, | 375 | static __be32 decode_sessionid(struct xdr_stream *xdr, |
288 | struct nfs4_sessionid *sid) | 376 | struct nfs4_sessionid *sid) |
289 | { | 377 | { |
@@ -639,10 +727,10 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op) | |||
639 | case OP_CB_RECALL_ANY: | 727 | case OP_CB_RECALL_ANY: |
640 | case OP_CB_RECALL_SLOT: | 728 | case OP_CB_RECALL_SLOT: |
641 | case OP_CB_LAYOUTRECALL: | 729 | case OP_CB_LAYOUTRECALL: |
730 | case OP_CB_NOTIFY_DEVICEID: | ||
642 | *op = &callback_ops[op_nr]; | 731 | *op = &callback_ops[op_nr]; |
643 | break; | 732 | break; |
644 | 733 | ||
645 | case OP_CB_NOTIFY_DEVICEID: | ||
646 | case OP_CB_NOTIFY: | 734 | case OP_CB_NOTIFY: |
647 | case OP_CB_PUSH_DELEG: | 735 | case OP_CB_PUSH_DELEG: |
648 | case OP_CB_RECALLABLE_OBJ_AVAIL: | 736 | case OP_CB_RECALLABLE_OBJ_AVAIL: |
@@ -849,6 +937,12 @@ static struct callback_op callback_ops[] = { | |||
849 | (callback_decode_arg_t)decode_layoutrecall_args, | 937 | (callback_decode_arg_t)decode_layoutrecall_args, |
850 | .res_maxsize = CB_OP_LAYOUTRECALL_RES_MAXSZ, | 938 | .res_maxsize = CB_OP_LAYOUTRECALL_RES_MAXSZ, |
851 | }, | 939 | }, |
940 | [OP_CB_NOTIFY_DEVICEID] = { | ||
941 | .process_op = (callback_process_op_t)nfs4_callback_devicenotify, | ||
942 | .decode_args = | ||
943 | (callback_decode_arg_t)decode_devicenotify_args, | ||
944 | .res_maxsize = CB_OP_DEVICENOTIFY_RES_MAXSZ, | ||
945 | }, | ||
852 | [OP_CB_SEQUENCE] = { | 946 | [OP_CB_SEQUENCE] = { |
853 | .process_op = (callback_process_op_t)nfs4_callback_sequence, | 947 | .process_op = (callback_process_op_t)nfs4_callback_sequence, |
854 | .decode_args = (callback_decode_arg_t)decode_cb_sequence_args, | 948 | .decode_args = (callback_decode_arg_t)decode_cb_sequence_args, |
diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 139be9647d80..b3dc2b88b65b 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c | |||
@@ -290,6 +290,8 @@ static void nfs_free_client(struct nfs_client *clp) | |||
290 | if (clp->cl_machine_cred != NULL) | 290 | if (clp->cl_machine_cred != NULL) |
291 | put_rpccred(clp->cl_machine_cred); | 291 | put_rpccred(clp->cl_machine_cred); |
292 | 292 | ||
293 | nfs4_deviceid_purge_client(clp); | ||
294 | |||
293 | kfree(clp->cl_hostname); | 295 | kfree(clp->cl_hostname); |
294 | kfree(clp); | 296 | kfree(clp); |
295 | 297 | ||
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index bbbc6bf5cb2e..dd25c2aec375 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c | |||
@@ -21,25 +21,13 @@ | |||
21 | #include "delegation.h" | 21 | #include "delegation.h" |
22 | #include "internal.h" | 22 | #include "internal.h" |
23 | 23 | ||
24 | static void nfs_do_free_delegation(struct nfs_delegation *delegation) | ||
25 | { | ||
26 | kfree(delegation); | ||
27 | } | ||
28 | |||
29 | static void nfs_free_delegation_callback(struct rcu_head *head) | ||
30 | { | ||
31 | struct nfs_delegation *delegation = container_of(head, struct nfs_delegation, rcu); | ||
32 | |||
33 | nfs_do_free_delegation(delegation); | ||
34 | } | ||
35 | |||
36 | static void nfs_free_delegation(struct nfs_delegation *delegation) | 24 | static void nfs_free_delegation(struct nfs_delegation *delegation) |
37 | { | 25 | { |
38 | if (delegation->cred) { | 26 | if (delegation->cred) { |
39 | put_rpccred(delegation->cred); | 27 | put_rpccred(delegation->cred); |
40 | delegation->cred = NULL; | 28 | delegation->cred = NULL; |
41 | } | 29 | } |
42 | call_rcu(&delegation->rcu, nfs_free_delegation_callback); | 30 | kfree_rcu(delegation, rcu); |
43 | } | 31 | } |
44 | 32 | ||
45 | /** | 33 | /** |
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 424e47773a84..ededdbd0db38 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
@@ -512,12 +512,7 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en | |||
512 | struct page **xdr_pages, struct page *page, unsigned int buflen) | 512 | struct page **xdr_pages, struct page *page, unsigned int buflen) |
513 | { | 513 | { |
514 | struct xdr_stream stream; | 514 | struct xdr_stream stream; |
515 | struct xdr_buf buf = { | 515 | struct xdr_buf buf; |
516 | .pages = xdr_pages, | ||
517 | .page_len = buflen, | ||
518 | .buflen = buflen, | ||
519 | .len = buflen, | ||
520 | }; | ||
521 | struct page *scratch; | 516 | struct page *scratch; |
522 | struct nfs_cache_array *array; | 517 | struct nfs_cache_array *array; |
523 | unsigned int count = 0; | 518 | unsigned int count = 0; |
@@ -527,7 +522,7 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en | |||
527 | if (scratch == NULL) | 522 | if (scratch == NULL) |
528 | return -ENOMEM; | 523 | return -ENOMEM; |
529 | 524 | ||
530 | xdr_init_decode(&stream, &buf, NULL); | 525 | xdr_init_decode_pages(&stream, &buf, xdr_pages, buflen); |
531 | xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); | 526 | xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); |
532 | 527 | ||
533 | do { | 528 | do { |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 57bb31ad7a5e..144f2a3c7185 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -1298,8 +1298,12 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1298 | i_size_write(inode, new_isize); | 1298 | i_size_write(inode, new_isize); |
1299 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; | 1299 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; |
1300 | } | 1300 | } |
1301 | dprintk("NFS: isize change on server for file %s/%ld\n", | 1301 | dprintk("NFS: isize change on server for file %s/%ld " |
1302 | inode->i_sb->s_id, inode->i_ino); | 1302 | "(%Ld to %Ld)\n", |
1303 | inode->i_sb->s_id, | ||
1304 | inode->i_ino, | ||
1305 | (long long)cur_isize, | ||
1306 | (long long)new_isize); | ||
1303 | } | 1307 | } |
1304 | } else | 1308 | } else |
1305 | invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR | 1309 | invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR |
@@ -1424,9 +1428,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1424 | */ | 1428 | */ |
1425 | void nfs4_evict_inode(struct inode *inode) | 1429 | void nfs4_evict_inode(struct inode *inode) |
1426 | { | 1430 | { |
1427 | pnfs_destroy_layout(NFS_I(inode)); | ||
1428 | truncate_inode_pages(&inode->i_data, 0); | 1431 | truncate_inode_pages(&inode->i_data, 0); |
1429 | end_writeback(inode); | 1432 | end_writeback(inode); |
1433 | pnfs_return_layout(inode); | ||
1434 | pnfs_destroy_layout(NFS_I(inode)); | ||
1430 | /* If we are holding a delegation, return it! */ | 1435 | /* If we are holding a delegation, return it! */ |
1431 | nfs_inode_return_delegation_noreclaim(inode); | 1436 | nfs_inode_return_delegation_noreclaim(inode); |
1432 | /* First call standard NFS clear_inode() code */ | 1437 | /* First call standard NFS clear_inode() code */ |
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 2df6ca7b5898..b9056cbe68d6 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
@@ -310,6 +310,7 @@ extern int nfs_migrate_page(struct address_space *, | |||
310 | #endif | 310 | #endif |
311 | 311 | ||
312 | /* nfs4proc.c */ | 312 | /* nfs4proc.c */ |
313 | extern void __nfs4_read_done_cb(struct nfs_read_data *); | ||
313 | extern void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data); | 314 | extern void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data); |
314 | extern int nfs4_init_client(struct nfs_client *clp, | 315 | extern int nfs4_init_client(struct nfs_client *clp, |
315 | const struct rpc_timeout *timeparms, | 316 | const struct rpc_timeout *timeparms, |
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index be79dc9f386d..426908809c97 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c | |||
@@ -421,6 +421,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, | |||
421 | struct nfs4_deviceid *id, | 421 | struct nfs4_deviceid *id, |
422 | gfp_t gfp_flags) | 422 | gfp_t gfp_flags) |
423 | { | 423 | { |
424 | struct nfs4_deviceid_node *d; | ||
424 | struct nfs4_file_layout_dsaddr *dsaddr; | 425 | struct nfs4_file_layout_dsaddr *dsaddr; |
425 | int status = -EINVAL; | 426 | int status = -EINVAL; |
426 | struct nfs_server *nfss = NFS_SERVER(lo->plh_inode); | 427 | struct nfs_server *nfss = NFS_SERVER(lo->plh_inode); |
@@ -428,7 +429,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, | |||
428 | dprintk("--> %s\n", __func__); | 429 | dprintk("--> %s\n", __func__); |
429 | 430 | ||
430 | if (fl->pattern_offset > lgr->range.offset) { | 431 | if (fl->pattern_offset > lgr->range.offset) { |
431 | dprintk("%s pattern_offset %lld to large\n", | 432 | dprintk("%s pattern_offset %lld too large\n", |
432 | __func__, fl->pattern_offset); | 433 | __func__, fl->pattern_offset); |
433 | goto out; | 434 | goto out; |
434 | } | 435 | } |
@@ -440,12 +441,14 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, | |||
440 | } | 441 | } |
441 | 442 | ||
442 | /* find and reference the deviceid */ | 443 | /* find and reference the deviceid */ |
443 | dsaddr = nfs4_fl_find_get_deviceid(id); | 444 | d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode)->pnfs_curr_ld, |
444 | if (dsaddr == NULL) { | 445 | NFS_SERVER(lo->plh_inode)->nfs_client, id); |
446 | if (d == NULL) { | ||
445 | dsaddr = get_device_info(lo->plh_inode, id, gfp_flags); | 447 | dsaddr = get_device_info(lo->plh_inode, id, gfp_flags); |
446 | if (dsaddr == NULL) | 448 | if (dsaddr == NULL) |
447 | goto out; | 449 | goto out; |
448 | } | 450 | } else |
451 | dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node); | ||
449 | fl->dsaddr = dsaddr; | 452 | fl->dsaddr = dsaddr; |
450 | 453 | ||
451 | if (fl->first_stripe_index < 0 || | 454 | if (fl->first_stripe_index < 0 || |
@@ -507,12 +510,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, | |||
507 | gfp_t gfp_flags) | 510 | gfp_t gfp_flags) |
508 | { | 511 | { |
509 | struct xdr_stream stream; | 512 | struct xdr_stream stream; |
510 | struct xdr_buf buf = { | 513 | struct xdr_buf buf; |
511 | .pages = lgr->layoutp->pages, | ||
512 | .page_len = lgr->layoutp->len, | ||
513 | .buflen = lgr->layoutp->len, | ||
514 | .len = lgr->layoutp->len, | ||
515 | }; | ||
516 | struct page *scratch; | 514 | struct page *scratch; |
517 | __be32 *p; | 515 | __be32 *p; |
518 | uint32_t nfl_util; | 516 | uint32_t nfl_util; |
@@ -524,7 +522,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, | |||
524 | if (!scratch) | 522 | if (!scratch) |
525 | return -ENOMEM; | 523 | return -ENOMEM; |
526 | 524 | ||
527 | xdr_init_decode(&stream, &buf, NULL); | 525 | xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, lgr->layoutp->len); |
528 | xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); | 526 | xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); |
529 | 527 | ||
530 | /* 20 = ufl_util (4), first_stripe_index (4), pattern_offset (8), | 528 | /* 20 = ufl_util (4), first_stripe_index (4), pattern_offset (8), |
@@ -535,7 +533,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, | |||
535 | 533 | ||
536 | memcpy(id, p, sizeof(*id)); | 534 | memcpy(id, p, sizeof(*id)); |
537 | p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE); | 535 | p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE); |
538 | print_deviceid(id); | 536 | nfs4_print_deviceid(id); |
539 | 537 | ||
540 | nfl_util = be32_to_cpup(p++); | 538 | nfl_util = be32_to_cpup(p++); |
541 | if (nfl_util & NFL4_UFLG_COMMIT_THRU_MDS) | 539 | if (nfl_util & NFL4_UFLG_COMMIT_THRU_MDS) |
@@ -653,16 +651,19 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, | |||
653 | /* | 651 | /* |
654 | * filelayout_pg_test(). Called by nfs_can_coalesce_requests() | 652 | * filelayout_pg_test(). Called by nfs_can_coalesce_requests() |
655 | * | 653 | * |
656 | * return 1 : coalesce page | 654 | * return true : coalesce page |
657 | * return 0 : don't coalesce page | 655 | * return false : don't coalesce page |
658 | */ | 656 | */ |
659 | int | 657 | bool |
660 | filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, | 658 | filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, |
661 | struct nfs_page *req) | 659 | struct nfs_page *req) |
662 | { | 660 | { |
663 | u64 p_stripe, r_stripe; | 661 | u64 p_stripe, r_stripe; |
664 | u32 stripe_unit; | 662 | u32 stripe_unit; |
665 | 663 | ||
664 | if (!pnfs_generic_pg_test(pgio, prev, req)) | ||
665 | return 0; | ||
666 | |||
666 | if (!pgio->pg_lseg) | 667 | if (!pgio->pg_lseg) |
667 | return 1; | 668 | return 1; |
668 | p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT; | 669 | p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT; |
@@ -860,6 +861,12 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, | |||
860 | return -ENOMEM; | 861 | return -ENOMEM; |
861 | } | 862 | } |
862 | 863 | ||
864 | static void | ||
865 | filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d) | ||
866 | { | ||
867 | nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node)); | ||
868 | } | ||
869 | |||
863 | static struct pnfs_layoutdriver_type filelayout_type = { | 870 | static struct pnfs_layoutdriver_type filelayout_type = { |
864 | .id = LAYOUT_NFSV4_1_FILES, | 871 | .id = LAYOUT_NFSV4_1_FILES, |
865 | .name = "LAYOUT_NFSV4_1_FILES", | 872 | .name = "LAYOUT_NFSV4_1_FILES", |
@@ -872,6 +879,7 @@ static struct pnfs_layoutdriver_type filelayout_type = { | |||
872 | .commit_pagelist = filelayout_commit_pagelist, | 879 | .commit_pagelist = filelayout_commit_pagelist, |
873 | .read_pagelist = filelayout_read_pagelist, | 880 | .read_pagelist = filelayout_read_pagelist, |
874 | .write_pagelist = filelayout_write_pagelist, | 881 | .write_pagelist = filelayout_write_pagelist, |
882 | .free_deviceid_node = filelayout_free_deveiceid_node, | ||
875 | }; | 883 | }; |
876 | 884 | ||
877 | static int __init nfs4filelayout_init(void) | 885 | static int __init nfs4filelayout_init(void) |
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index 2b461d77b43a..cebe01e3795e 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h | |||
@@ -59,9 +59,7 @@ struct nfs4_pnfs_ds { | |||
59 | #define NFS4_DEVICE_ID_NEG_ENTRY 0x00000001 | 59 | #define NFS4_DEVICE_ID_NEG_ENTRY 0x00000001 |
60 | 60 | ||
61 | struct nfs4_file_layout_dsaddr { | 61 | struct nfs4_file_layout_dsaddr { |
62 | struct hlist_node node; | 62 | struct nfs4_deviceid_node id_node; |
63 | struct nfs4_deviceid deviceid; | ||
64 | atomic_t ref; | ||
65 | unsigned long flags; | 63 | unsigned long flags; |
66 | u32 stripe_count; | 64 | u32 stripe_count; |
67 | u8 *stripe_indices; | 65 | u8 *stripe_indices; |
@@ -95,14 +93,12 @@ extern struct nfs_fh * | |||
95 | nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j); | 93 | nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j); |
96 | 94 | ||
97 | extern void print_ds(struct nfs4_pnfs_ds *ds); | 95 | extern void print_ds(struct nfs4_pnfs_ds *ds); |
98 | extern void print_deviceid(struct nfs4_deviceid *dev_id); | ||
99 | u32 nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset); | 96 | u32 nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset); |
100 | u32 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j); | 97 | u32 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j); |
101 | struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, | 98 | struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, |
102 | u32 ds_idx); | 99 | u32 ds_idx); |
103 | extern struct nfs4_file_layout_dsaddr * | ||
104 | nfs4_fl_find_get_deviceid(struct nfs4_deviceid *dev_id); | ||
105 | extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); | 100 | extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); |
101 | extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); | ||
106 | struct nfs4_file_layout_dsaddr * | 102 | struct nfs4_file_layout_dsaddr * |
107 | get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags); | 103 | get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags); |
108 | 104 | ||
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index db07c7af1395..3b7bf1377264 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c | |||
@@ -37,30 +37,6 @@ | |||
37 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | 37 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD |
38 | 38 | ||
39 | /* | 39 | /* |
40 | * Device ID RCU cache. A device ID is unique per client ID and layout type. | ||
41 | */ | ||
42 | #define NFS4_FL_DEVICE_ID_HASH_BITS 5 | ||
43 | #define NFS4_FL_DEVICE_ID_HASH_SIZE (1 << NFS4_FL_DEVICE_ID_HASH_BITS) | ||
44 | #define NFS4_FL_DEVICE_ID_HASH_MASK (NFS4_FL_DEVICE_ID_HASH_SIZE - 1) | ||
45 | |||
46 | static inline u32 | ||
47 | nfs4_fl_deviceid_hash(struct nfs4_deviceid *id) | ||
48 | { | ||
49 | unsigned char *cptr = (unsigned char *)id->data; | ||
50 | unsigned int nbytes = NFS4_DEVICEID4_SIZE; | ||
51 | u32 x = 0; | ||
52 | |||
53 | while (nbytes--) { | ||
54 | x *= 37; | ||
55 | x += *cptr++; | ||
56 | } | ||
57 | return x & NFS4_FL_DEVICE_ID_HASH_MASK; | ||
58 | } | ||
59 | |||
60 | static struct hlist_head filelayout_deviceid_cache[NFS4_FL_DEVICE_ID_HASH_SIZE]; | ||
61 | static DEFINE_SPINLOCK(filelayout_deviceid_lock); | ||
62 | |||
63 | /* | ||
64 | * Data server cache | 40 | * Data server cache |
65 | * | 41 | * |
66 | * Data servers can be mapped to different device ids. | 42 | * Data servers can be mapped to different device ids. |
@@ -89,27 +65,6 @@ print_ds(struct nfs4_pnfs_ds *ds) | |||
89 | ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0); | 65 | ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0); |
90 | } | 66 | } |
91 | 67 | ||
92 | void | ||
93 | print_ds_list(struct nfs4_file_layout_dsaddr *dsaddr) | ||
94 | { | ||
95 | int i; | ||
96 | |||
97 | ifdebug(FACILITY) { | ||
98 | printk("%s dsaddr->ds_num %d\n", __func__, | ||
99 | dsaddr->ds_num); | ||
100 | for (i = 0; i < dsaddr->ds_num; i++) | ||
101 | print_ds(dsaddr->ds_list[i]); | ||
102 | } | ||
103 | } | ||
104 | |||
105 | void print_deviceid(struct nfs4_deviceid *id) | ||
106 | { | ||
107 | u32 *p = (u32 *)id; | ||
108 | |||
109 | dprintk("%s: device id= [%x%x%x%x]\n", __func__, | ||
110 | p[0], p[1], p[2], p[3]); | ||
111 | } | ||
112 | |||
113 | /* nfs4_ds_cache_lock is held */ | 68 | /* nfs4_ds_cache_lock is held */ |
114 | static struct nfs4_pnfs_ds * | 69 | static struct nfs4_pnfs_ds * |
115 | _data_server_lookup_locked(u32 ip_addr, u32 port) | 70 | _data_server_lookup_locked(u32 ip_addr, u32 port) |
@@ -201,13 +156,13 @@ destroy_ds(struct nfs4_pnfs_ds *ds) | |||
201 | kfree(ds); | 156 | kfree(ds); |
202 | } | 157 | } |
203 | 158 | ||
204 | static void | 159 | void |
205 | nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) | 160 | nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) |
206 | { | 161 | { |
207 | struct nfs4_pnfs_ds *ds; | 162 | struct nfs4_pnfs_ds *ds; |
208 | int i; | 163 | int i; |
209 | 164 | ||
210 | print_deviceid(&dsaddr->deviceid); | 165 | nfs4_print_deviceid(&dsaddr->id_node.deviceid); |
211 | 166 | ||
212 | for (i = 0; i < dsaddr->ds_num; i++) { | 167 | for (i = 0; i < dsaddr->ds_num; i++) { |
213 | ds = dsaddr->ds_list[i]; | 168 | ds = dsaddr->ds_list[i]; |
@@ -353,12 +308,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) | |||
353 | u8 max_stripe_index; | 308 | u8 max_stripe_index; |
354 | struct nfs4_file_layout_dsaddr *dsaddr = NULL; | 309 | struct nfs4_file_layout_dsaddr *dsaddr = NULL; |
355 | struct xdr_stream stream; | 310 | struct xdr_stream stream; |
356 | struct xdr_buf buf = { | 311 | struct xdr_buf buf; |
357 | .pages = pdev->pages, | ||
358 | .page_len = pdev->pglen, | ||
359 | .buflen = pdev->pglen, | ||
360 | .len = pdev->pglen, | ||
361 | }; | ||
362 | struct page *scratch; | 312 | struct page *scratch; |
363 | 313 | ||
364 | /* set up xdr stream */ | 314 | /* set up xdr stream */ |
@@ -366,7 +316,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) | |||
366 | if (!scratch) | 316 | if (!scratch) |
367 | goto out_err; | 317 | goto out_err; |
368 | 318 | ||
369 | xdr_init_decode(&stream, &buf, NULL); | 319 | xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen); |
370 | xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); | 320 | xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); |
371 | 321 | ||
372 | /* Get the stripe count (number of stripe index) */ | 322 | /* Get the stripe count (number of stripe index) */ |
@@ -431,8 +381,10 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) | |||
431 | dsaddr->stripe_indices = stripe_indices; | 381 | dsaddr->stripe_indices = stripe_indices; |
432 | stripe_indices = NULL; | 382 | stripe_indices = NULL; |
433 | dsaddr->ds_num = num; | 383 | dsaddr->ds_num = num; |
434 | 384 | nfs4_init_deviceid_node(&dsaddr->id_node, | |
435 | memcpy(&dsaddr->deviceid, &pdev->dev_id, sizeof(pdev->dev_id)); | 385 | NFS_SERVER(ino)->pnfs_curr_ld, |
386 | NFS_SERVER(ino)->nfs_client, | ||
387 | &pdev->dev_id); | ||
436 | 388 | ||
437 | for (i = 0; i < dsaddr->ds_num; i++) { | 389 | for (i = 0; i < dsaddr->ds_num; i++) { |
438 | int j; | 390 | int j; |
@@ -505,8 +457,8 @@ out_err: | |||
505 | static struct nfs4_file_layout_dsaddr * | 457 | static struct nfs4_file_layout_dsaddr * |
506 | decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_flags) | 458 | decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_flags) |
507 | { | 459 | { |
508 | struct nfs4_file_layout_dsaddr *d, *new; | 460 | struct nfs4_deviceid_node *d; |
509 | long hash; | 461 | struct nfs4_file_layout_dsaddr *n, *new; |
510 | 462 | ||
511 | new = decode_device(inode, dev, gfp_flags); | 463 | new = decode_device(inode, dev, gfp_flags); |
512 | if (!new) { | 464 | if (!new) { |
@@ -515,20 +467,13 @@ decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_fl | |||
515 | return NULL; | 467 | return NULL; |
516 | } | 468 | } |
517 | 469 | ||
518 | spin_lock(&filelayout_deviceid_lock); | 470 | d = nfs4_insert_deviceid_node(&new->id_node); |
519 | d = nfs4_fl_find_get_deviceid(&new->deviceid); | 471 | n = container_of(d, struct nfs4_file_layout_dsaddr, id_node); |
520 | if (d) { | 472 | if (n != new) { |
521 | spin_unlock(&filelayout_deviceid_lock); | ||
522 | nfs4_fl_free_deviceid(new); | 473 | nfs4_fl_free_deviceid(new); |
523 | return d; | 474 | return n; |
524 | } | 475 | } |
525 | 476 | ||
526 | INIT_HLIST_NODE(&new->node); | ||
527 | atomic_set(&new->ref, 1); | ||
528 | hash = nfs4_fl_deviceid_hash(&new->deviceid); | ||
529 | hlist_add_head_rcu(&new->node, &filelayout_deviceid_cache[hash]); | ||
530 | spin_unlock(&filelayout_deviceid_lock); | ||
531 | |||
532 | return new; | 477 | return new; |
533 | } | 478 | } |
534 | 479 | ||
@@ -600,35 +545,7 @@ out_free: | |||
600 | void | 545 | void |
601 | nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) | 546 | nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) |
602 | { | 547 | { |
603 | if (atomic_dec_and_lock(&dsaddr->ref, &filelayout_deviceid_lock)) { | 548 | nfs4_put_deviceid_node(&dsaddr->id_node); |
604 | hlist_del_rcu(&dsaddr->node); | ||
605 | spin_unlock(&filelayout_deviceid_lock); | ||
606 | |||
607 | synchronize_rcu(); | ||
608 | nfs4_fl_free_deviceid(dsaddr); | ||
609 | } | ||
610 | } | ||
611 | |||
612 | struct nfs4_file_layout_dsaddr * | ||
613 | nfs4_fl_find_get_deviceid(struct nfs4_deviceid *id) | ||
614 | { | ||
615 | struct nfs4_file_layout_dsaddr *d; | ||
616 | struct hlist_node *n; | ||
617 | long hash = nfs4_fl_deviceid_hash(id); | ||
618 | |||
619 | |||
620 | rcu_read_lock(); | ||
621 | hlist_for_each_entry_rcu(d, n, &filelayout_deviceid_cache[hash], node) { | ||
622 | if (!memcmp(&d->deviceid, id, sizeof(*id))) { | ||
623 | if (!atomic_inc_not_zero(&d->ref)) | ||
624 | goto fail; | ||
625 | rcu_read_unlock(); | ||
626 | return d; | ||
627 | } | ||
628 | } | ||
629 | fail: | ||
630 | rcu_read_unlock(); | ||
631 | return NULL; | ||
632 | } | 549 | } |
633 | 550 | ||
634 | /* | 551 | /* |
@@ -676,15 +593,15 @@ static void | |||
676 | filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr, | 593 | filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr, |
677 | int err, u32 ds_addr) | 594 | int err, u32 ds_addr) |
678 | { | 595 | { |
679 | u32 *p = (u32 *)&dsaddr->deviceid; | 596 | u32 *p = (u32 *)&dsaddr->id_node.deviceid; |
680 | 597 | ||
681 | printk(KERN_ERR "NFS: data server %x connection error %d." | 598 | printk(KERN_ERR "NFS: data server %x connection error %d." |
682 | " Deviceid [%x%x%x%x] marked out of use.\n", | 599 | " Deviceid [%x%x%x%x] marked out of use.\n", |
683 | ds_addr, err, p[0], p[1], p[2], p[3]); | 600 | ds_addr, err, p[0], p[1], p[2], p[3]); |
684 | 601 | ||
685 | spin_lock(&filelayout_deviceid_lock); | 602 | spin_lock(&nfs4_ds_cache_lock); |
686 | dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY; | 603 | dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY; |
687 | spin_unlock(&filelayout_deviceid_lock); | 604 | spin_unlock(&nfs4_ds_cache_lock); |
688 | } | 605 | } |
689 | 606 | ||
690 | struct nfs4_pnfs_ds * | 607 | struct nfs4_pnfs_ds * |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index cf1b339c3937..d2c4b59c896d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -267,9 +267,11 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc | |||
267 | break; | 267 | break; |
268 | nfs4_schedule_stateid_recovery(server, state); | 268 | nfs4_schedule_stateid_recovery(server, state); |
269 | goto wait_on_recovery; | 269 | goto wait_on_recovery; |
270 | case -NFS4ERR_EXPIRED: | ||
271 | if (state != NULL) | ||
272 | nfs4_schedule_stateid_recovery(server, state); | ||
270 | case -NFS4ERR_STALE_STATEID: | 273 | case -NFS4ERR_STALE_STATEID: |
271 | case -NFS4ERR_STALE_CLIENTID: | 274 | case -NFS4ERR_STALE_CLIENTID: |
272 | case -NFS4ERR_EXPIRED: | ||
273 | nfs4_schedule_lease_recovery(clp); | 275 | nfs4_schedule_lease_recovery(clp); |
274 | goto wait_on_recovery; | 276 | goto wait_on_recovery; |
275 | #if defined(CONFIG_NFS_V4_1) | 277 | #if defined(CONFIG_NFS_V4_1) |
@@ -2361,6 +2363,9 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, | |||
2361 | struct nfs4_state *state = NULL; | 2363 | struct nfs4_state *state = NULL; |
2362 | int status; | 2364 | int status; |
2363 | 2365 | ||
2366 | if (pnfs_ld_layoutret_on_setattr(inode)) | ||
2367 | pnfs_return_layout(inode); | ||
2368 | |||
2364 | nfs_fattr_init(fattr); | 2369 | nfs_fattr_init(fattr); |
2365 | 2370 | ||
2366 | /* Search for an existing open(O_WRITE) file */ | 2371 | /* Search for an existing open(O_WRITE) file */ |
@@ -3175,6 +3180,11 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, | |||
3175 | return err; | 3180 | return err; |
3176 | } | 3181 | } |
3177 | 3182 | ||
3183 | void __nfs4_read_done_cb(struct nfs_read_data *data) | ||
3184 | { | ||
3185 | nfs_invalidate_atime(data->inode); | ||
3186 | } | ||
3187 | |||
3178 | static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data) | 3188 | static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data) |
3179 | { | 3189 | { |
3180 | struct nfs_server *server = NFS_SERVER(data->inode); | 3190 | struct nfs_server *server = NFS_SERVER(data->inode); |
@@ -3184,7 +3194,7 @@ static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data) | |||
3184 | return -EAGAIN; | 3194 | return -EAGAIN; |
3185 | } | 3195 | } |
3186 | 3196 | ||
3187 | nfs_invalidate_atime(data->inode); | 3197 | __nfs4_read_done_cb(data); |
3188 | if (task->tk_status > 0) | 3198 | if (task->tk_status > 0) |
3189 | renew_lease(server, data->timestamp); | 3199 | renew_lease(server, data->timestamp); |
3190 | return 0; | 3200 | return 0; |
@@ -3198,7 +3208,8 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) | |||
3198 | if (!nfs4_sequence_done(task, &data->res.seq_res)) | 3208 | if (!nfs4_sequence_done(task, &data->res.seq_res)) |
3199 | return -EAGAIN; | 3209 | return -EAGAIN; |
3200 | 3210 | ||
3201 | return data->read_done_cb(task, data); | 3211 | return data->read_done_cb ? data->read_done_cb(task, data) : |
3212 | nfs4_read_done_cb(task, data); | ||
3202 | } | 3213 | } |
3203 | 3214 | ||
3204 | static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) | 3215 | static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) |
@@ -3243,7 +3254,8 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) | |||
3243 | { | 3254 | { |
3244 | if (!nfs4_sequence_done(task, &data->res.seq_res)) | 3255 | if (!nfs4_sequence_done(task, &data->res.seq_res)) |
3245 | return -EAGAIN; | 3256 | return -EAGAIN; |
3246 | return data->write_done_cb(task, data); | 3257 | return data->write_done_cb ? data->write_done_cb(task, data) : |
3258 | nfs4_write_done_cb(task, data); | ||
3247 | } | 3259 | } |
3248 | 3260 | ||
3249 | /* Reset the the nfs_write_data to send the write to the MDS. */ | 3261 | /* Reset the the nfs_write_data to send the write to the MDS. */ |
@@ -3670,9 +3682,11 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, | |||
3670 | break; | 3682 | break; |
3671 | nfs4_schedule_stateid_recovery(server, state); | 3683 | nfs4_schedule_stateid_recovery(server, state); |
3672 | goto wait_on_recovery; | 3684 | goto wait_on_recovery; |
3685 | case -NFS4ERR_EXPIRED: | ||
3686 | if (state != NULL) | ||
3687 | nfs4_schedule_stateid_recovery(server, state); | ||
3673 | case -NFS4ERR_STALE_STATEID: | 3688 | case -NFS4ERR_STALE_STATEID: |
3674 | case -NFS4ERR_STALE_CLIENTID: | 3689 | case -NFS4ERR_STALE_CLIENTID: |
3675 | case -NFS4ERR_EXPIRED: | ||
3676 | nfs4_schedule_lease_recovery(clp); | 3690 | nfs4_schedule_lease_recovery(clp); |
3677 | goto wait_on_recovery; | 3691 | goto wait_on_recovery; |
3678 | #if defined(CONFIG_NFS_V4_1) | 3692 | #if defined(CONFIG_NFS_V4_1) |
@@ -4543,6 +4557,7 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl) | |||
4543 | case -ESTALE: | 4557 | case -ESTALE: |
4544 | goto out; | 4558 | goto out; |
4545 | case -NFS4ERR_EXPIRED: | 4559 | case -NFS4ERR_EXPIRED: |
4560 | nfs4_schedule_stateid_recovery(server, state); | ||
4546 | case -NFS4ERR_STALE_CLIENTID: | 4561 | case -NFS4ERR_STALE_CLIENTID: |
4547 | case -NFS4ERR_STALE_STATEID: | 4562 | case -NFS4ERR_STALE_STATEID: |
4548 | nfs4_schedule_lease_recovery(server->nfs_client); | 4563 | nfs4_schedule_lease_recovery(server->nfs_client); |
@@ -5666,6 +5681,88 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp) | |||
5666 | return status; | 5681 | return status; |
5667 | } | 5682 | } |
5668 | 5683 | ||
5684 | static void | ||
5685 | nfs4_layoutreturn_prepare(struct rpc_task *task, void *calldata) | ||
5686 | { | ||
5687 | struct nfs4_layoutreturn *lrp = calldata; | ||
5688 | |||
5689 | dprintk("--> %s\n", __func__); | ||
5690 | if (nfs41_setup_sequence(lrp->clp->cl_session, &lrp->args.seq_args, | ||
5691 | &lrp->res.seq_res, 0, task)) | ||
5692 | return; | ||
5693 | rpc_call_start(task); | ||
5694 | } | ||
5695 | |||
5696 | static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) | ||
5697 | { | ||
5698 | struct nfs4_layoutreturn *lrp = calldata; | ||
5699 | struct nfs_server *server; | ||
5700 | |||
5701 | dprintk("--> %s\n", __func__); | ||
5702 | |||
5703 | if (!nfs4_sequence_done(task, &lrp->res.seq_res)) | ||
5704 | return; | ||
5705 | |||
5706 | server = NFS_SERVER(lrp->args.inode); | ||
5707 | if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) { | ||
5708 | nfs_restart_rpc(task, lrp->clp); | ||
5709 | return; | ||
5710 | } | ||
5711 | if (task->tk_status == 0) { | ||
5712 | struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout; | ||
5713 | |||
5714 | if (lrp->res.lrs_present) { | ||
5715 | spin_lock(&lo->plh_inode->i_lock); | ||
5716 | pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); | ||
5717 | spin_unlock(&lo->plh_inode->i_lock); | ||
5718 | } else | ||
5719 | BUG_ON(!list_empty(&lo->plh_segs)); | ||
5720 | } | ||
5721 | dprintk("<-- %s\n", __func__); | ||
5722 | } | ||
5723 | |||
5724 | static void nfs4_layoutreturn_release(void *calldata) | ||
5725 | { | ||
5726 | struct nfs4_layoutreturn *lrp = calldata; | ||
5727 | |||
5728 | dprintk("--> %s\n", __func__); | ||
5729 | put_layout_hdr(NFS_I(lrp->args.inode)->layout); | ||
5730 | kfree(calldata); | ||
5731 | dprintk("<-- %s\n", __func__); | ||
5732 | } | ||
5733 | |||
5734 | static const struct rpc_call_ops nfs4_layoutreturn_call_ops = { | ||
5735 | .rpc_call_prepare = nfs4_layoutreturn_prepare, | ||
5736 | .rpc_call_done = nfs4_layoutreturn_done, | ||
5737 | .rpc_release = nfs4_layoutreturn_release, | ||
5738 | }; | ||
5739 | |||
5740 | int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp) | ||
5741 | { | ||
5742 | struct rpc_task *task; | ||
5743 | struct rpc_message msg = { | ||
5744 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTRETURN], | ||
5745 | .rpc_argp = &lrp->args, | ||
5746 | .rpc_resp = &lrp->res, | ||
5747 | }; | ||
5748 | struct rpc_task_setup task_setup_data = { | ||
5749 | .rpc_client = lrp->clp->cl_rpcclient, | ||
5750 | .rpc_message = &msg, | ||
5751 | .callback_ops = &nfs4_layoutreturn_call_ops, | ||
5752 | .callback_data = lrp, | ||
5753 | }; | ||
5754 | int status; | ||
5755 | |||
5756 | dprintk("--> %s\n", __func__); | ||
5757 | task = rpc_run_task(&task_setup_data); | ||
5758 | if (IS_ERR(task)) | ||
5759 | return PTR_ERR(task); | ||
5760 | status = task->tk_status; | ||
5761 | dprintk("<-- %s status=%d\n", __func__, status); | ||
5762 | rpc_put_task(task); | ||
5763 | return status; | ||
5764 | } | ||
5765 | |||
5669 | static int | 5766 | static int |
5670 | _nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev) | 5767 | _nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev) |
5671 | { | 5768 | { |
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 036f5adc9e1f..e97dd219f84f 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c | |||
@@ -1466,7 +1466,10 @@ static int nfs4_reclaim_lease(struct nfs_client *clp) | |||
1466 | #ifdef CONFIG_NFS_V4_1 | 1466 | #ifdef CONFIG_NFS_V4_1 |
1467 | void nfs4_schedule_session_recovery(struct nfs4_session *session) | 1467 | void nfs4_schedule_session_recovery(struct nfs4_session *session) |
1468 | { | 1468 | { |
1469 | nfs4_schedule_lease_recovery(session->clp); | 1469 | struct nfs_client *clp = session->clp; |
1470 | |||
1471 | set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); | ||
1472 | nfs4_schedule_lease_recovery(clp); | ||
1470 | } | 1473 | } |
1471 | EXPORT_SYMBOL_GPL(nfs4_schedule_session_recovery); | 1474 | EXPORT_SYMBOL_GPL(nfs4_schedule_session_recovery); |
1472 | 1475 | ||
@@ -1549,6 +1552,7 @@ static int nfs4_reset_session(struct nfs_client *clp) | |||
1549 | status = nfs4_recovery_handle_error(clp, status); | 1552 | status = nfs4_recovery_handle_error(clp, status); |
1550 | goto out; | 1553 | goto out; |
1551 | } | 1554 | } |
1555 | clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); | ||
1552 | /* create_session negotiated new slot table */ | 1556 | /* create_session negotiated new slot table */ |
1553 | clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state); | 1557 | clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state); |
1554 | 1558 | ||
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index c3ccd2c46834..d869a5e5464b 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c | |||
@@ -338,7 +338,11 @@ static int nfs4_stat_to_errno(int); | |||
338 | 1 /* layoutupdate4 layout type */ + \ | 338 | 1 /* layoutupdate4 layout type */ + \ |
339 | 1 /* NULL filelayout layoutupdate4 payload */) | 339 | 1 /* NULL filelayout layoutupdate4 payload */) |
340 | #define decode_layoutcommit_maxsz (op_decode_hdr_maxsz + 3) | 340 | #define decode_layoutcommit_maxsz (op_decode_hdr_maxsz + 3) |
341 | 341 | #define encode_layoutreturn_maxsz (8 + op_encode_hdr_maxsz + \ | |
342 | encode_stateid_maxsz + \ | ||
343 | 1 /* FIXME: opaque lrf_body always empty at the moment */) | ||
344 | #define decode_layoutreturn_maxsz (op_decode_hdr_maxsz + \ | ||
345 | 1 + decode_stateid_maxsz) | ||
342 | #else /* CONFIG_NFS_V4_1 */ | 346 | #else /* CONFIG_NFS_V4_1 */ |
343 | #define encode_sequence_maxsz 0 | 347 | #define encode_sequence_maxsz 0 |
344 | #define decode_sequence_maxsz 0 | 348 | #define decode_sequence_maxsz 0 |
@@ -760,7 +764,14 @@ static int nfs4_stat_to_errno(int); | |||
760 | decode_putfh_maxsz + \ | 764 | decode_putfh_maxsz + \ |
761 | decode_layoutcommit_maxsz + \ | 765 | decode_layoutcommit_maxsz + \ |
762 | decode_getattr_maxsz) | 766 | decode_getattr_maxsz) |
763 | 767 | #define NFS4_enc_layoutreturn_sz (compound_encode_hdr_maxsz + \ | |
768 | encode_sequence_maxsz + \ | ||
769 | encode_putfh_maxsz + \ | ||
770 | encode_layoutreturn_maxsz) | ||
771 | #define NFS4_dec_layoutreturn_sz (compound_decode_hdr_maxsz + \ | ||
772 | decode_sequence_maxsz + \ | ||
773 | decode_putfh_maxsz + \ | ||
774 | decode_layoutreturn_maxsz) | ||
764 | 775 | ||
765 | const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + | 776 | const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + |
766 | compound_encode_hdr_maxsz + | 777 | compound_encode_hdr_maxsz + |
@@ -1864,6 +1875,7 @@ encode_layoutget(struct xdr_stream *xdr, | |||
1864 | 1875 | ||
1865 | static int | 1876 | static int |
1866 | encode_layoutcommit(struct xdr_stream *xdr, | 1877 | encode_layoutcommit(struct xdr_stream *xdr, |
1878 | struct inode *inode, | ||
1867 | const struct nfs4_layoutcommit_args *args, | 1879 | const struct nfs4_layoutcommit_args *args, |
1868 | struct compound_hdr *hdr) | 1880 | struct compound_hdr *hdr) |
1869 | { | 1881 | { |
@@ -1872,7 +1884,7 @@ encode_layoutcommit(struct xdr_stream *xdr, | |||
1872 | dprintk("%s: lbw: %llu type: %d\n", __func__, args->lastbytewritten, | 1884 | dprintk("%s: lbw: %llu type: %d\n", __func__, args->lastbytewritten, |
1873 | NFS_SERVER(args->inode)->pnfs_curr_ld->id); | 1885 | NFS_SERVER(args->inode)->pnfs_curr_ld->id); |
1874 | 1886 | ||
1875 | p = reserve_space(xdr, 48 + NFS4_STATEID_SIZE); | 1887 | p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE); |
1876 | *p++ = cpu_to_be32(OP_LAYOUTCOMMIT); | 1888 | *p++ = cpu_to_be32(OP_LAYOUTCOMMIT); |
1877 | /* Only whole file layouts */ | 1889 | /* Only whole file layouts */ |
1878 | p = xdr_encode_hyper(p, 0); /* offset */ | 1890 | p = xdr_encode_hyper(p, 0); /* offset */ |
@@ -1883,12 +1895,49 @@ encode_layoutcommit(struct xdr_stream *xdr, | |||
1883 | p = xdr_encode_hyper(p, args->lastbytewritten); | 1895 | p = xdr_encode_hyper(p, args->lastbytewritten); |
1884 | *p++ = cpu_to_be32(0); /* Never send time_modify_changed */ | 1896 | *p++ = cpu_to_be32(0); /* Never send time_modify_changed */ |
1885 | *p++ = cpu_to_be32(NFS_SERVER(args->inode)->pnfs_curr_ld->id);/* type */ | 1897 | *p++ = cpu_to_be32(NFS_SERVER(args->inode)->pnfs_curr_ld->id);/* type */ |
1886 | *p++ = cpu_to_be32(0); /* no file layout payload */ | 1898 | |
1899 | if (NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit) | ||
1900 | NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit( | ||
1901 | NFS_I(inode)->layout, xdr, args); | ||
1902 | else { | ||
1903 | p = reserve_space(xdr, 4); | ||
1904 | *p = cpu_to_be32(0); /* no layout-type payload */ | ||
1905 | } | ||
1887 | 1906 | ||
1888 | hdr->nops++; | 1907 | hdr->nops++; |
1889 | hdr->replen += decode_layoutcommit_maxsz; | 1908 | hdr->replen += decode_layoutcommit_maxsz; |
1890 | return 0; | 1909 | return 0; |
1891 | } | 1910 | } |
1911 | |||
1912 | static void | ||
1913 | encode_layoutreturn(struct xdr_stream *xdr, | ||
1914 | const struct nfs4_layoutreturn_args *args, | ||
1915 | struct compound_hdr *hdr) | ||
1916 | { | ||
1917 | __be32 *p; | ||
1918 | |||
1919 | p = reserve_space(xdr, 20); | ||
1920 | *p++ = cpu_to_be32(OP_LAYOUTRETURN); | ||
1921 | *p++ = cpu_to_be32(0); /* reclaim. always 0 for now */ | ||
1922 | *p++ = cpu_to_be32(args->layout_type); | ||
1923 | *p++ = cpu_to_be32(IOMODE_ANY); | ||
1924 | *p = cpu_to_be32(RETURN_FILE); | ||
1925 | p = reserve_space(xdr, 16 + NFS4_STATEID_SIZE); | ||
1926 | p = xdr_encode_hyper(p, 0); | ||
1927 | p = xdr_encode_hyper(p, NFS4_MAX_UINT64); | ||
1928 | spin_lock(&args->inode->i_lock); | ||
1929 | xdr_encode_opaque_fixed(p, &args->stateid.data, NFS4_STATEID_SIZE); | ||
1930 | spin_unlock(&args->inode->i_lock); | ||
1931 | if (NFS_SERVER(args->inode)->pnfs_curr_ld->encode_layoutreturn) { | ||
1932 | NFS_SERVER(args->inode)->pnfs_curr_ld->encode_layoutreturn( | ||
1933 | NFS_I(args->inode)->layout, xdr, args); | ||
1934 | } else { | ||
1935 | p = reserve_space(xdr, 4); | ||
1936 | *p = cpu_to_be32(0); | ||
1937 | } | ||
1938 | hdr->nops++; | ||
1939 | hdr->replen += decode_layoutreturn_maxsz; | ||
1940 | } | ||
1892 | #endif /* CONFIG_NFS_V4_1 */ | 1941 | #endif /* CONFIG_NFS_V4_1 */ |
1893 | 1942 | ||
1894 | /* | 1943 | /* |
@@ -2706,10 +2755,12 @@ static void nfs4_xdr_enc_layoutget(struct rpc_rqst *req, | |||
2706 | /* | 2755 | /* |
2707 | * Encode LAYOUTCOMMIT request | 2756 | * Encode LAYOUTCOMMIT request |
2708 | */ | 2757 | */ |
2709 | static int nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req, | 2758 | static void nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req, |
2710 | struct xdr_stream *xdr, | 2759 | struct xdr_stream *xdr, |
2711 | struct nfs4_layoutcommit_args *args) | 2760 | struct nfs4_layoutcommit_args *args) |
2712 | { | 2761 | { |
2762 | struct nfs4_layoutcommit_data *data = | ||
2763 | container_of(args, struct nfs4_layoutcommit_data, args); | ||
2713 | struct compound_hdr hdr = { | 2764 | struct compound_hdr hdr = { |
2714 | .minorversion = nfs4_xdr_minorversion(&args->seq_args), | 2765 | .minorversion = nfs4_xdr_minorversion(&args->seq_args), |
2715 | }; | 2766 | }; |
@@ -2717,10 +2768,27 @@ static int nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req, | |||
2717 | encode_compound_hdr(xdr, req, &hdr); | 2768 | encode_compound_hdr(xdr, req, &hdr); |
2718 | encode_sequence(xdr, &args->seq_args, &hdr); | 2769 | encode_sequence(xdr, &args->seq_args, &hdr); |
2719 | encode_putfh(xdr, NFS_FH(args->inode), &hdr); | 2770 | encode_putfh(xdr, NFS_FH(args->inode), &hdr); |
2720 | encode_layoutcommit(xdr, args, &hdr); | 2771 | encode_layoutcommit(xdr, data->args.inode, args, &hdr); |
2721 | encode_getfattr(xdr, args->bitmask, &hdr); | 2772 | encode_getfattr(xdr, args->bitmask, &hdr); |
2722 | encode_nops(&hdr); | 2773 | encode_nops(&hdr); |
2723 | return 0; | 2774 | } |
2775 | |||
2776 | /* | ||
2777 | * Encode LAYOUTRETURN request | ||
2778 | */ | ||
2779 | static void nfs4_xdr_enc_layoutreturn(struct rpc_rqst *req, | ||
2780 | struct xdr_stream *xdr, | ||
2781 | struct nfs4_layoutreturn_args *args) | ||
2782 | { | ||
2783 | struct compound_hdr hdr = { | ||
2784 | .minorversion = nfs4_xdr_minorversion(&args->seq_args), | ||
2785 | }; | ||
2786 | |||
2787 | encode_compound_hdr(xdr, req, &hdr); | ||
2788 | encode_sequence(xdr, &args->seq_args, &hdr); | ||
2789 | encode_putfh(xdr, NFS_FH(args->inode), &hdr); | ||
2790 | encode_layoutreturn(xdr, args, &hdr); | ||
2791 | encode_nops(&hdr); | ||
2724 | } | 2792 | } |
2725 | #endif /* CONFIG_NFS_V4_1 */ | 2793 | #endif /* CONFIG_NFS_V4_1 */ |
2726 | 2794 | ||
@@ -5203,6 +5271,27 @@ out_overflow: | |||
5203 | return -EIO; | 5271 | return -EIO; |
5204 | } | 5272 | } |
5205 | 5273 | ||
5274 | static int decode_layoutreturn(struct xdr_stream *xdr, | ||
5275 | struct nfs4_layoutreturn_res *res) | ||
5276 | { | ||
5277 | __be32 *p; | ||
5278 | int status; | ||
5279 | |||
5280 | status = decode_op_hdr(xdr, OP_LAYOUTRETURN); | ||
5281 | if (status) | ||
5282 | return status; | ||
5283 | p = xdr_inline_decode(xdr, 4); | ||
5284 | if (unlikely(!p)) | ||
5285 | goto out_overflow; | ||
5286 | res->lrs_present = be32_to_cpup(p); | ||
5287 | if (res->lrs_present) | ||
5288 | status = decode_stateid(xdr, &res->stateid); | ||
5289 | return status; | ||
5290 | out_overflow: | ||
5291 | print_overflow_msg(__func__, xdr); | ||
5292 | return -EIO; | ||
5293 | } | ||
5294 | |||
5206 | static int decode_layoutcommit(struct xdr_stream *xdr, | 5295 | static int decode_layoutcommit(struct xdr_stream *xdr, |
5207 | struct rpc_rqst *req, | 5296 | struct rpc_rqst *req, |
5208 | struct nfs4_layoutcommit_res *res) | 5297 | struct nfs4_layoutcommit_res *res) |
@@ -6320,6 +6409,30 @@ out: | |||
6320 | } | 6409 | } |
6321 | 6410 | ||
6322 | /* | 6411 | /* |
6412 | * Decode LAYOUTRETURN response | ||
6413 | */ | ||
6414 | static int nfs4_xdr_dec_layoutreturn(struct rpc_rqst *rqstp, | ||
6415 | struct xdr_stream *xdr, | ||
6416 | struct nfs4_layoutreturn_res *res) | ||
6417 | { | ||
6418 | struct compound_hdr hdr; | ||
6419 | int status; | ||
6420 | |||
6421 | status = decode_compound_hdr(xdr, &hdr); | ||
6422 | if (status) | ||
6423 | goto out; | ||
6424 | status = decode_sequence(xdr, &res->seq_res, rqstp); | ||
6425 | if (status) | ||
6426 | goto out; | ||
6427 | status = decode_putfh(xdr); | ||
6428 | if (status) | ||
6429 | goto out; | ||
6430 | status = decode_layoutreturn(xdr, res); | ||
6431 | out: | ||
6432 | return status; | ||
6433 | } | ||
6434 | |||
6435 | /* | ||
6323 | * Decode LAYOUTCOMMIT response | 6436 | * Decode LAYOUTCOMMIT response |
6324 | */ | 6437 | */ |
6325 | static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp, | 6438 | static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp, |
@@ -6547,6 +6660,7 @@ struct rpc_procinfo nfs4_procedures[] = { | |||
6547 | PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo), | 6660 | PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo), |
6548 | PROC(LAYOUTGET, enc_layoutget, dec_layoutget), | 6661 | PROC(LAYOUTGET, enc_layoutget, dec_layoutget), |
6549 | PROC(LAYOUTCOMMIT, enc_layoutcommit, dec_layoutcommit), | 6662 | PROC(LAYOUTCOMMIT, enc_layoutcommit, dec_layoutcommit), |
6663 | PROC(LAYOUTRETURN, enc_layoutreturn, dec_layoutreturn), | ||
6550 | #endif /* CONFIG_NFS_V4_1 */ | 6664 | #endif /* CONFIG_NFS_V4_1 */ |
6551 | }; | 6665 | }; |
6552 | 6666 | ||
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index c541093a5bf2..c4744e1d513c 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c | |||
@@ -87,7 +87,7 @@ | |||
87 | #define NFS_ROOT "/tftpboot/%s" | 87 | #define NFS_ROOT "/tftpboot/%s" |
88 | 88 | ||
89 | /* Default NFSROOT mount options. */ | 89 | /* Default NFSROOT mount options. */ |
90 | #define NFS_DEF_OPTIONS "udp" | 90 | #define NFS_DEF_OPTIONS "vers=2,udp,rsize=4096,wsize=4096" |
91 | 91 | ||
92 | /* Parameters passed from the kernel command line */ | 92 | /* Parameters passed from the kernel command line */ |
93 | static char nfs_root_parms[256] __initdata = ""; | 93 | static char nfs_root_parms[256] __initdata = ""; |
diff --git a/fs/nfs/objlayout/Kbuild b/fs/nfs/objlayout/Kbuild new file mode 100644 index 000000000000..ed30ea072bb8 --- /dev/null +++ b/fs/nfs/objlayout/Kbuild | |||
@@ -0,0 +1,5 @@ | |||
1 | # | ||
2 | # Makefile for the pNFS Objects Layout Driver kernel module | ||
3 | # | ||
4 | objlayoutdriver-y := objio_osd.o pnfs_osd_xdr_cli.o objlayout.o | ||
5 | obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayoutdriver.o | ||
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c new file mode 100644 index 000000000000..9cf208df1f25 --- /dev/null +++ b/fs/nfs/objlayout/objio_osd.c | |||
@@ -0,0 +1,1057 @@ | |||
1 | /* | ||
2 | * pNFS Objects layout implementation over open-osd initiator library | ||
3 | * | ||
4 | * Copyright (C) 2009 Panasas Inc. [year of first publication] | ||
5 | * All rights reserved. | ||
6 | * | ||
7 | * Benny Halevy <bhalevy@panasas.com> | ||
8 | * Boaz Harrosh <bharrosh@panasas.com> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License version 2 | ||
12 | * See the file COPYING included with this distribution for more details. | ||
13 | * | ||
14 | * Redistribution and use in source and binary forms, with or without | ||
15 | * modification, are permitted provided that the following conditions | ||
16 | * are met: | ||
17 | * | ||
18 | * 1. Redistributions of source code must retain the above copyright | ||
19 | * notice, this list of conditions and the following disclaimer. | ||
20 | * 2. Redistributions in binary form must reproduce the above copyright | ||
21 | * notice, this list of conditions and the following disclaimer in the | ||
22 | * documentation and/or other materials provided with the distribution. | ||
23 | * 3. Neither the name of the Panasas company nor the names of its | ||
24 | * contributors may be used to endorse or promote products derived | ||
25 | * from this software without specific prior written permission. | ||
26 | * | ||
27 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED | ||
28 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF | ||
29 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
30 | * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | ||
31 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
32 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
33 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR | ||
34 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
35 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
36 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
37 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
38 | */ | ||
39 | |||
40 | #include <linux/module.h> | ||
41 | #include <scsi/osd_initiator.h> | ||
42 | |||
43 | #include "objlayout.h" | ||
44 | |||
45 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | ||
46 | |||
47 | #define _LLU(x) ((unsigned long long)x) | ||
48 | |||
49 | enum { BIO_MAX_PAGES_KMALLOC = | ||
50 | (PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec), | ||
51 | }; | ||
52 | |||
53 | struct objio_dev_ent { | ||
54 | struct nfs4_deviceid_node id_node; | ||
55 | struct osd_dev *od; | ||
56 | }; | ||
57 | |||
58 | static void | ||
59 | objio_free_deviceid_node(struct nfs4_deviceid_node *d) | ||
60 | { | ||
61 | struct objio_dev_ent *de = container_of(d, struct objio_dev_ent, id_node); | ||
62 | |||
63 | dprintk("%s: free od=%p\n", __func__, de->od); | ||
64 | osduld_put_device(de->od); | ||
65 | kfree(de); | ||
66 | } | ||
67 | |||
68 | static struct objio_dev_ent *_dev_list_find(const struct nfs_server *nfss, | ||
69 | const struct nfs4_deviceid *d_id) | ||
70 | { | ||
71 | struct nfs4_deviceid_node *d; | ||
72 | struct objio_dev_ent *de; | ||
73 | |||
74 | d = nfs4_find_get_deviceid(nfss->pnfs_curr_ld, nfss->nfs_client, d_id); | ||
75 | if (!d) | ||
76 | return NULL; | ||
77 | |||
78 | de = container_of(d, struct objio_dev_ent, id_node); | ||
79 | return de; | ||
80 | } | ||
81 | |||
82 | static struct objio_dev_ent * | ||
83 | _dev_list_add(const struct nfs_server *nfss, | ||
84 | const struct nfs4_deviceid *d_id, struct osd_dev *od, | ||
85 | gfp_t gfp_flags) | ||
86 | { | ||
87 | struct nfs4_deviceid_node *d; | ||
88 | struct objio_dev_ent *de = kzalloc(sizeof(*de), gfp_flags); | ||
89 | struct objio_dev_ent *n; | ||
90 | |||
91 | if (!de) { | ||
92 | dprintk("%s: -ENOMEM od=%p\n", __func__, od); | ||
93 | return NULL; | ||
94 | } | ||
95 | |||
96 | dprintk("%s: Adding od=%p\n", __func__, od); | ||
97 | nfs4_init_deviceid_node(&de->id_node, | ||
98 | nfss->pnfs_curr_ld, | ||
99 | nfss->nfs_client, | ||
100 | d_id); | ||
101 | de->od = od; | ||
102 | |||
103 | d = nfs4_insert_deviceid_node(&de->id_node); | ||
104 | n = container_of(d, struct objio_dev_ent, id_node); | ||
105 | if (n != de) { | ||
106 | dprintk("%s: Race with other n->od=%p\n", __func__, n->od); | ||
107 | objio_free_deviceid_node(&de->id_node); | ||
108 | de = n; | ||
109 | } | ||
110 | |||
111 | atomic_inc(&de->id_node.ref); | ||
112 | return de; | ||
113 | } | ||
114 | |||
115 | struct caps_buffers { | ||
116 | u8 caps_key[OSD_CRYPTO_KEYID_SIZE]; | ||
117 | u8 creds[OSD_CAP_LEN]; | ||
118 | }; | ||
119 | |||
120 | struct objio_segment { | ||
121 | struct pnfs_layout_segment lseg; | ||
122 | |||
123 | struct pnfs_osd_object_cred *comps; | ||
124 | |||
125 | unsigned mirrors_p1; | ||
126 | unsigned stripe_unit; | ||
127 | unsigned group_width; /* Data stripe_units without integrity comps */ | ||
128 | u64 group_depth; | ||
129 | unsigned group_count; | ||
130 | |||
131 | unsigned max_io_size; | ||
132 | |||
133 | unsigned comps_index; | ||
134 | unsigned num_comps; | ||
135 | /* variable length */ | ||
136 | struct objio_dev_ent *ods[]; | ||
137 | }; | ||
138 | |||
139 | static inline struct objio_segment * | ||
140 | OBJIO_LSEG(struct pnfs_layout_segment *lseg) | ||
141 | { | ||
142 | return container_of(lseg, struct objio_segment, lseg); | ||
143 | } | ||
144 | |||
145 | struct objio_state; | ||
146 | typedef ssize_t (*objio_done_fn)(struct objio_state *ios); | ||
147 | |||
148 | struct objio_state { | ||
149 | /* Generic layer */ | ||
150 | struct objlayout_io_state ol_state; | ||
151 | |||
152 | struct objio_segment *layout; | ||
153 | |||
154 | struct kref kref; | ||
155 | objio_done_fn done; | ||
156 | void *private; | ||
157 | |||
158 | unsigned long length; | ||
159 | unsigned numdevs; /* Actually used devs in this IO */ | ||
160 | /* A per-device variable array of size numdevs */ | ||
161 | struct _objio_per_comp { | ||
162 | struct bio *bio; | ||
163 | struct osd_request *or; | ||
164 | unsigned long length; | ||
165 | u64 offset; | ||
166 | unsigned dev; | ||
167 | } per_dev[]; | ||
168 | }; | ||
169 | |||
170 | /* Send and wait for a get_device_info of devices in the layout, | ||
171 | then look them up with the osd_initiator library */ | ||
172 | static struct objio_dev_ent *_device_lookup(struct pnfs_layout_hdr *pnfslay, | ||
173 | struct objio_segment *objio_seg, unsigned comp, | ||
174 | gfp_t gfp_flags) | ||
175 | { | ||
176 | struct pnfs_osd_deviceaddr *deviceaddr; | ||
177 | struct nfs4_deviceid *d_id; | ||
178 | struct objio_dev_ent *ode; | ||
179 | struct osd_dev *od; | ||
180 | struct osd_dev_info odi; | ||
181 | int err; | ||
182 | |||
183 | d_id = &objio_seg->comps[comp].oc_object_id.oid_device_id; | ||
184 | |||
185 | ode = _dev_list_find(NFS_SERVER(pnfslay->plh_inode), d_id); | ||
186 | if (ode) | ||
187 | return ode; | ||
188 | |||
189 | err = objlayout_get_deviceinfo(pnfslay, d_id, &deviceaddr, gfp_flags); | ||
190 | if (unlikely(err)) { | ||
191 | dprintk("%s: objlayout_get_deviceinfo dev(%llx:%llx) =>%d\n", | ||
192 | __func__, _DEVID_LO(d_id), _DEVID_HI(d_id), err); | ||
193 | return ERR_PTR(err); | ||
194 | } | ||
195 | |||
196 | odi.systemid_len = deviceaddr->oda_systemid.len; | ||
197 | if (odi.systemid_len > sizeof(odi.systemid)) { | ||
198 | err = -EINVAL; | ||
199 | goto out; | ||
200 | } else if (odi.systemid_len) | ||
201 | memcpy(odi.systemid, deviceaddr->oda_systemid.data, | ||
202 | odi.systemid_len); | ||
203 | odi.osdname_len = deviceaddr->oda_osdname.len; | ||
204 | odi.osdname = (u8 *)deviceaddr->oda_osdname.data; | ||
205 | |||
206 | if (!odi.osdname_len && !odi.systemid_len) { | ||
207 | dprintk("%s: !odi.osdname_len && !odi.systemid_len\n", | ||
208 | __func__); | ||
209 | err = -ENODEV; | ||
210 | goto out; | ||
211 | } | ||
212 | |||
213 | od = osduld_info_lookup(&odi); | ||
214 | if (unlikely(IS_ERR(od))) { | ||
215 | err = PTR_ERR(od); | ||
216 | dprintk("%s: osduld_info_lookup => %d\n", __func__, err); | ||
217 | goto out; | ||
218 | } | ||
219 | |||
220 | ode = _dev_list_add(NFS_SERVER(pnfslay->plh_inode), d_id, od, | ||
221 | gfp_flags); | ||
222 | |||
223 | out: | ||
224 | dprintk("%s: return=%d\n", __func__, err); | ||
225 | objlayout_put_deviceinfo(deviceaddr); | ||
226 | return err ? ERR_PTR(err) : ode; | ||
227 | } | ||
228 | |||
229 | static int objio_devices_lookup(struct pnfs_layout_hdr *pnfslay, | ||
230 | struct objio_segment *objio_seg, | ||
231 | gfp_t gfp_flags) | ||
232 | { | ||
233 | unsigned i; | ||
234 | int err; | ||
235 | |||
236 | /* lookup all devices */ | ||
237 | for (i = 0; i < objio_seg->num_comps; i++) { | ||
238 | struct objio_dev_ent *ode; | ||
239 | |||
240 | ode = _device_lookup(pnfslay, objio_seg, i, gfp_flags); | ||
241 | if (unlikely(IS_ERR(ode))) { | ||
242 | err = PTR_ERR(ode); | ||
243 | goto out; | ||
244 | } | ||
245 | objio_seg->ods[i] = ode; | ||
246 | } | ||
247 | err = 0; | ||
248 | |||
249 | out: | ||
250 | dprintk("%s: return=%d\n", __func__, err); | ||
251 | return err; | ||
252 | } | ||
253 | |||
254 | static int _verify_data_map(struct pnfs_osd_layout *layout) | ||
255 | { | ||
256 | struct pnfs_osd_data_map *data_map = &layout->olo_map; | ||
257 | u64 stripe_length; | ||
258 | u32 group_width; | ||
259 | |||
260 | /* FIXME: Only raid0 for now. if not go through MDS */ | ||
261 | if (data_map->odm_raid_algorithm != PNFS_OSD_RAID_0) { | ||
262 | printk(KERN_ERR "Only RAID_0 for now\n"); | ||
263 | return -ENOTSUPP; | ||
264 | } | ||
265 | if (0 != (data_map->odm_num_comps % (data_map->odm_mirror_cnt + 1))) { | ||
266 | printk(KERN_ERR "Data Map wrong, num_comps=%u mirrors=%u\n", | ||
267 | data_map->odm_num_comps, data_map->odm_mirror_cnt); | ||
268 | return -EINVAL; | ||
269 | } | ||
270 | |||
271 | if (data_map->odm_group_width) | ||
272 | group_width = data_map->odm_group_width; | ||
273 | else | ||
274 | group_width = data_map->odm_num_comps / | ||
275 | (data_map->odm_mirror_cnt + 1); | ||
276 | |||
277 | stripe_length = (u64)data_map->odm_stripe_unit * group_width; | ||
278 | if (stripe_length >= (1ULL << 32)) { | ||
279 | printk(KERN_ERR "Total Stripe length(0x%llx)" | ||
280 | " >= 32bit is not supported\n", _LLU(stripe_length)); | ||
281 | return -ENOTSUPP; | ||
282 | } | ||
283 | |||
284 | if (0 != (data_map->odm_stripe_unit & ~PAGE_MASK)) { | ||
285 | printk(KERN_ERR "Stripe Unit(0x%llx)" | ||
286 | " must be Multples of PAGE_SIZE(0x%lx)\n", | ||
287 | _LLU(data_map->odm_stripe_unit), PAGE_SIZE); | ||
288 | return -ENOTSUPP; | ||
289 | } | ||
290 | |||
291 | return 0; | ||
292 | } | ||
293 | |||
294 | static void copy_single_comp(struct pnfs_osd_object_cred *cur_comp, | ||
295 | struct pnfs_osd_object_cred *src_comp, | ||
296 | struct caps_buffers *caps_p) | ||
297 | { | ||
298 | WARN_ON(src_comp->oc_cap_key.cred_len > sizeof(caps_p->caps_key)); | ||
299 | WARN_ON(src_comp->oc_cap.cred_len > sizeof(caps_p->creds)); | ||
300 | |||
301 | *cur_comp = *src_comp; | ||
302 | |||
303 | memcpy(caps_p->caps_key, src_comp->oc_cap_key.cred, | ||
304 | sizeof(caps_p->caps_key)); | ||
305 | cur_comp->oc_cap_key.cred = caps_p->caps_key; | ||
306 | |||
307 | memcpy(caps_p->creds, src_comp->oc_cap.cred, | ||
308 | sizeof(caps_p->creds)); | ||
309 | cur_comp->oc_cap.cred = caps_p->creds; | ||
310 | } | ||
311 | |||
312 | int objio_alloc_lseg(struct pnfs_layout_segment **outp, | ||
313 | struct pnfs_layout_hdr *pnfslay, | ||
314 | struct pnfs_layout_range *range, | ||
315 | struct xdr_stream *xdr, | ||
316 | gfp_t gfp_flags) | ||
317 | { | ||
318 | struct objio_segment *objio_seg; | ||
319 | struct pnfs_osd_xdr_decode_layout_iter iter; | ||
320 | struct pnfs_osd_layout layout; | ||
321 | struct pnfs_osd_object_cred *cur_comp, src_comp; | ||
322 | struct caps_buffers *caps_p; | ||
323 | int err; | ||
324 | |||
325 | err = pnfs_osd_xdr_decode_layout_map(&layout, &iter, xdr); | ||
326 | if (unlikely(err)) | ||
327 | return err; | ||
328 | |||
329 | err = _verify_data_map(&layout); | ||
330 | if (unlikely(err)) | ||
331 | return err; | ||
332 | |||
333 | objio_seg = kzalloc(sizeof(*objio_seg) + | ||
334 | sizeof(objio_seg->ods[0]) * layout.olo_num_comps + | ||
335 | sizeof(*objio_seg->comps) * layout.olo_num_comps + | ||
336 | sizeof(struct caps_buffers) * layout.olo_num_comps, | ||
337 | gfp_flags); | ||
338 | if (!objio_seg) | ||
339 | return -ENOMEM; | ||
340 | |||
341 | objio_seg->comps = (void *)(objio_seg->ods + layout.olo_num_comps); | ||
342 | cur_comp = objio_seg->comps; | ||
343 | caps_p = (void *)(cur_comp + layout.olo_num_comps); | ||
344 | while (pnfs_osd_xdr_decode_layout_comp(&src_comp, &iter, xdr, &err)) | ||
345 | copy_single_comp(cur_comp++, &src_comp, caps_p++); | ||
346 | if (unlikely(err)) | ||
347 | goto err; | ||
348 | |||
349 | objio_seg->num_comps = layout.olo_num_comps; | ||
350 | objio_seg->comps_index = layout.olo_comps_index; | ||
351 | err = objio_devices_lookup(pnfslay, objio_seg, gfp_flags); | ||
352 | if (err) | ||
353 | goto err; | ||
354 | |||
355 | objio_seg->mirrors_p1 = layout.olo_map.odm_mirror_cnt + 1; | ||
356 | objio_seg->stripe_unit = layout.olo_map.odm_stripe_unit; | ||
357 | if (layout.olo_map.odm_group_width) { | ||
358 | objio_seg->group_width = layout.olo_map.odm_group_width; | ||
359 | objio_seg->group_depth = layout.olo_map.odm_group_depth; | ||
360 | objio_seg->group_count = layout.olo_map.odm_num_comps / | ||
361 | objio_seg->mirrors_p1 / | ||
362 | objio_seg->group_width; | ||
363 | } else { | ||
364 | objio_seg->group_width = layout.olo_map.odm_num_comps / | ||
365 | objio_seg->mirrors_p1; | ||
366 | objio_seg->group_depth = -1; | ||
367 | objio_seg->group_count = 1; | ||
368 | } | ||
369 | |||
370 | /* Cache this calculation it will hit for every page */ | ||
371 | objio_seg->max_io_size = (BIO_MAX_PAGES_KMALLOC * PAGE_SIZE - | ||
372 | objio_seg->stripe_unit) * | ||
373 | objio_seg->group_width; | ||
374 | |||
375 | *outp = &objio_seg->lseg; | ||
376 | return 0; | ||
377 | |||
378 | err: | ||
379 | kfree(objio_seg); | ||
380 | dprintk("%s: Error: return %d\n", __func__, err); | ||
381 | *outp = NULL; | ||
382 | return err; | ||
383 | } | ||
384 | |||
385 | void objio_free_lseg(struct pnfs_layout_segment *lseg) | ||
386 | { | ||
387 | int i; | ||
388 | struct objio_segment *objio_seg = OBJIO_LSEG(lseg); | ||
389 | |||
390 | for (i = 0; i < objio_seg->num_comps; i++) { | ||
391 | if (!objio_seg->ods[i]) | ||
392 | break; | ||
393 | nfs4_put_deviceid_node(&objio_seg->ods[i]->id_node); | ||
394 | } | ||
395 | kfree(objio_seg); | ||
396 | } | ||
397 | |||
398 | int objio_alloc_io_state(struct pnfs_layout_segment *lseg, | ||
399 | struct objlayout_io_state **outp, | ||
400 | gfp_t gfp_flags) | ||
401 | { | ||
402 | struct objio_segment *objio_seg = OBJIO_LSEG(lseg); | ||
403 | struct objio_state *ios; | ||
404 | const unsigned first_size = sizeof(*ios) + | ||
405 | objio_seg->num_comps * sizeof(ios->per_dev[0]); | ||
406 | const unsigned sec_size = objio_seg->num_comps * | ||
407 | sizeof(ios->ol_state.ioerrs[0]); | ||
408 | |||
409 | ios = kzalloc(first_size + sec_size, gfp_flags); | ||
410 | if (unlikely(!ios)) | ||
411 | return -ENOMEM; | ||
412 | |||
413 | ios->layout = objio_seg; | ||
414 | ios->ol_state.ioerrs = ((void *)ios) + first_size; | ||
415 | ios->ol_state.num_comps = objio_seg->num_comps; | ||
416 | |||
417 | *outp = &ios->ol_state; | ||
418 | return 0; | ||
419 | } | ||
420 | |||
421 | void objio_free_io_state(struct objlayout_io_state *ol_state) | ||
422 | { | ||
423 | struct objio_state *ios = container_of(ol_state, struct objio_state, | ||
424 | ol_state); | ||
425 | |||
426 | kfree(ios); | ||
427 | } | ||
428 | |||
429 | enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep) | ||
430 | { | ||
431 | switch (oep) { | ||
432 | case OSD_ERR_PRI_NO_ERROR: | ||
433 | return (enum pnfs_osd_errno)0; | ||
434 | |||
435 | case OSD_ERR_PRI_CLEAR_PAGES: | ||
436 | BUG_ON(1); | ||
437 | return 0; | ||
438 | |||
439 | case OSD_ERR_PRI_RESOURCE: | ||
440 | return PNFS_OSD_ERR_RESOURCE; | ||
441 | case OSD_ERR_PRI_BAD_CRED: | ||
442 | return PNFS_OSD_ERR_BAD_CRED; | ||
443 | case OSD_ERR_PRI_NO_ACCESS: | ||
444 | return PNFS_OSD_ERR_NO_ACCESS; | ||
445 | case OSD_ERR_PRI_UNREACHABLE: | ||
446 | return PNFS_OSD_ERR_UNREACHABLE; | ||
447 | case OSD_ERR_PRI_NOT_FOUND: | ||
448 | return PNFS_OSD_ERR_NOT_FOUND; | ||
449 | case OSD_ERR_PRI_NO_SPACE: | ||
450 | return PNFS_OSD_ERR_NO_SPACE; | ||
451 | default: | ||
452 | WARN_ON(1); | ||
453 | /* fallthrough */ | ||
454 | case OSD_ERR_PRI_EIO: | ||
455 | return PNFS_OSD_ERR_EIO; | ||
456 | } | ||
457 | } | ||
458 | |||
459 | static void _clear_bio(struct bio *bio) | ||
460 | { | ||
461 | struct bio_vec *bv; | ||
462 | unsigned i; | ||
463 | |||
464 | __bio_for_each_segment(bv, bio, i, 0) { | ||
465 | unsigned this_count = bv->bv_len; | ||
466 | |||
467 | if (likely(PAGE_SIZE == this_count)) | ||
468 | clear_highpage(bv->bv_page); | ||
469 | else | ||
470 | zero_user(bv->bv_page, bv->bv_offset, this_count); | ||
471 | } | ||
472 | } | ||
473 | |||
474 | static int _io_check(struct objio_state *ios, bool is_write) | ||
475 | { | ||
476 | enum osd_err_priority oep = OSD_ERR_PRI_NO_ERROR; | ||
477 | int lin_ret = 0; | ||
478 | int i; | ||
479 | |||
480 | for (i = 0; i < ios->numdevs; i++) { | ||
481 | struct osd_sense_info osi; | ||
482 | struct osd_request *or = ios->per_dev[i].or; | ||
483 | unsigned dev; | ||
484 | int ret; | ||
485 | |||
486 | if (!or) | ||
487 | continue; | ||
488 | |||
489 | ret = osd_req_decode_sense(or, &osi); | ||
490 | if (likely(!ret)) | ||
491 | continue; | ||
492 | |||
493 | if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) { | ||
494 | /* start read offset passed endof file */ | ||
495 | BUG_ON(is_write); | ||
496 | _clear_bio(ios->per_dev[i].bio); | ||
497 | dprintk("%s: start read offset passed end of file " | ||
498 | "offset=0x%llx, length=0x%lx\n", __func__, | ||
499 | _LLU(ios->per_dev[i].offset), | ||
500 | ios->per_dev[i].length); | ||
501 | |||
502 | continue; /* we recovered */ | ||
503 | } | ||
504 | dev = ios->per_dev[i].dev; | ||
505 | objlayout_io_set_result(&ios->ol_state, dev, | ||
506 | &ios->layout->comps[dev].oc_object_id, | ||
507 | osd_pri_2_pnfs_err(osi.osd_err_pri), | ||
508 | ios->per_dev[i].offset, | ||
509 | ios->per_dev[i].length, | ||
510 | is_write); | ||
511 | |||
512 | if (osi.osd_err_pri >= oep) { | ||
513 | oep = osi.osd_err_pri; | ||
514 | lin_ret = ret; | ||
515 | } | ||
516 | } | ||
517 | |||
518 | return lin_ret; | ||
519 | } | ||
520 | |||
521 | /* | ||
522 | * Common IO state helpers. | ||
523 | */ | ||
524 | static void _io_free(struct objio_state *ios) | ||
525 | { | ||
526 | unsigned i; | ||
527 | |||
528 | for (i = 0; i < ios->numdevs; i++) { | ||
529 | struct _objio_per_comp *per_dev = &ios->per_dev[i]; | ||
530 | |||
531 | if (per_dev->or) { | ||
532 | osd_end_request(per_dev->or); | ||
533 | per_dev->or = NULL; | ||
534 | } | ||
535 | |||
536 | if (per_dev->bio) { | ||
537 | bio_put(per_dev->bio); | ||
538 | per_dev->bio = NULL; | ||
539 | } | ||
540 | } | ||
541 | } | ||
542 | |||
543 | struct osd_dev *_io_od(struct objio_state *ios, unsigned dev) | ||
544 | { | ||
545 | unsigned min_dev = ios->layout->comps_index; | ||
546 | unsigned max_dev = min_dev + ios->layout->num_comps; | ||
547 | |||
548 | BUG_ON(dev < min_dev || max_dev <= dev); | ||
549 | return ios->layout->ods[dev - min_dev]->od; | ||
550 | } | ||
551 | |||
552 | struct _striping_info { | ||
553 | u64 obj_offset; | ||
554 | u64 group_length; | ||
555 | unsigned dev; | ||
556 | unsigned unit_off; | ||
557 | }; | ||
558 | |||
559 | static void _calc_stripe_info(struct objio_state *ios, u64 file_offset, | ||
560 | struct _striping_info *si) | ||
561 | { | ||
562 | u32 stripe_unit = ios->layout->stripe_unit; | ||
563 | u32 group_width = ios->layout->group_width; | ||
564 | u64 group_depth = ios->layout->group_depth; | ||
565 | u32 U = stripe_unit * group_width; | ||
566 | |||
567 | u64 T = U * group_depth; | ||
568 | u64 S = T * ios->layout->group_count; | ||
569 | u64 M = div64_u64(file_offset, S); | ||
570 | |||
571 | /* | ||
572 | G = (L - (M * S)) / T | ||
573 | H = (L - (M * S)) % T | ||
574 | */ | ||
575 | u64 LmodU = file_offset - M * S; | ||
576 | u32 G = div64_u64(LmodU, T); | ||
577 | u64 H = LmodU - G * T; | ||
578 | |||
579 | u32 N = div_u64(H, U); | ||
580 | |||
581 | div_u64_rem(file_offset, stripe_unit, &si->unit_off); | ||
582 | si->obj_offset = si->unit_off + (N * stripe_unit) + | ||
583 | (M * group_depth * stripe_unit); | ||
584 | |||
585 | /* "H - (N * U)" is just "H % U" so it's bound to u32 */ | ||
586 | si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width; | ||
587 | si->dev *= ios->layout->mirrors_p1; | ||
588 | |||
589 | si->group_length = T - H; | ||
590 | } | ||
591 | |||
592 | static int _add_stripe_unit(struct objio_state *ios, unsigned *cur_pg, | ||
593 | unsigned pgbase, struct _objio_per_comp *per_dev, int cur_len, | ||
594 | gfp_t gfp_flags) | ||
595 | { | ||
596 | unsigned pg = *cur_pg; | ||
597 | struct request_queue *q = | ||
598 | osd_request_queue(_io_od(ios, per_dev->dev)); | ||
599 | |||
600 | per_dev->length += cur_len; | ||
601 | |||
602 | if (per_dev->bio == NULL) { | ||
603 | unsigned stripes = ios->layout->num_comps / | ||
604 | ios->layout->mirrors_p1; | ||
605 | unsigned pages_in_stripe = stripes * | ||
606 | (ios->layout->stripe_unit / PAGE_SIZE); | ||
607 | unsigned bio_size = (ios->ol_state.nr_pages + pages_in_stripe) / | ||
608 | stripes; | ||
609 | |||
610 | if (BIO_MAX_PAGES_KMALLOC < bio_size) | ||
611 | bio_size = BIO_MAX_PAGES_KMALLOC; | ||
612 | |||
613 | per_dev->bio = bio_kmalloc(gfp_flags, bio_size); | ||
614 | if (unlikely(!per_dev->bio)) { | ||
615 | dprintk("Faild to allocate BIO size=%u\n", bio_size); | ||
616 | return -ENOMEM; | ||
617 | } | ||
618 | } | ||
619 | |||
620 | while (cur_len > 0) { | ||
621 | unsigned pglen = min_t(unsigned, PAGE_SIZE - pgbase, cur_len); | ||
622 | unsigned added_len; | ||
623 | |||
624 | BUG_ON(ios->ol_state.nr_pages <= pg); | ||
625 | cur_len -= pglen; | ||
626 | |||
627 | added_len = bio_add_pc_page(q, per_dev->bio, | ||
628 | ios->ol_state.pages[pg], pglen, pgbase); | ||
629 | if (unlikely(pglen != added_len)) | ||
630 | return -ENOMEM; | ||
631 | pgbase = 0; | ||
632 | ++pg; | ||
633 | } | ||
634 | BUG_ON(cur_len); | ||
635 | |||
636 | *cur_pg = pg; | ||
637 | return 0; | ||
638 | } | ||
639 | |||
640 | static int _prepare_one_group(struct objio_state *ios, u64 length, | ||
641 | struct _striping_info *si, unsigned *last_pg, | ||
642 | gfp_t gfp_flags) | ||
643 | { | ||
644 | unsigned stripe_unit = ios->layout->stripe_unit; | ||
645 | unsigned mirrors_p1 = ios->layout->mirrors_p1; | ||
646 | unsigned devs_in_group = ios->layout->group_width * mirrors_p1; | ||
647 | unsigned dev = si->dev; | ||
648 | unsigned first_dev = dev - (dev % devs_in_group); | ||
649 | unsigned max_comp = ios->numdevs ? ios->numdevs - mirrors_p1 : 0; | ||
650 | unsigned cur_pg = *last_pg; | ||
651 | int ret = 0; | ||
652 | |||
653 | while (length) { | ||
654 | struct _objio_per_comp *per_dev = &ios->per_dev[dev]; | ||
655 | unsigned cur_len, page_off = 0; | ||
656 | |||
657 | if (!per_dev->length) { | ||
658 | per_dev->dev = dev; | ||
659 | if (dev < si->dev) { | ||
660 | per_dev->offset = si->obj_offset + stripe_unit - | ||
661 | si->unit_off; | ||
662 | cur_len = stripe_unit; | ||
663 | } else if (dev == si->dev) { | ||
664 | per_dev->offset = si->obj_offset; | ||
665 | cur_len = stripe_unit - si->unit_off; | ||
666 | page_off = si->unit_off & ~PAGE_MASK; | ||
667 | BUG_ON(page_off && | ||
668 | (page_off != ios->ol_state.pgbase)); | ||
669 | } else { /* dev > si->dev */ | ||
670 | per_dev->offset = si->obj_offset - si->unit_off; | ||
671 | cur_len = stripe_unit; | ||
672 | } | ||
673 | |||
674 | if (max_comp < dev) | ||
675 | max_comp = dev; | ||
676 | } else { | ||
677 | cur_len = stripe_unit; | ||
678 | } | ||
679 | if (cur_len >= length) | ||
680 | cur_len = length; | ||
681 | |||
682 | ret = _add_stripe_unit(ios, &cur_pg, page_off , per_dev, | ||
683 | cur_len, gfp_flags); | ||
684 | if (unlikely(ret)) | ||
685 | goto out; | ||
686 | |||
687 | dev += mirrors_p1; | ||
688 | dev = (dev % devs_in_group) + first_dev; | ||
689 | |||
690 | length -= cur_len; | ||
691 | ios->length += cur_len; | ||
692 | } | ||
693 | out: | ||
694 | ios->numdevs = max_comp + mirrors_p1; | ||
695 | *last_pg = cur_pg; | ||
696 | return ret; | ||
697 | } | ||
698 | |||
699 | static int _io_rw_pagelist(struct objio_state *ios, gfp_t gfp_flags) | ||
700 | { | ||
701 | u64 length = ios->ol_state.count; | ||
702 | u64 offset = ios->ol_state.offset; | ||
703 | struct _striping_info si; | ||
704 | unsigned last_pg = 0; | ||
705 | int ret = 0; | ||
706 | |||
707 | while (length) { | ||
708 | _calc_stripe_info(ios, offset, &si); | ||
709 | |||
710 | if (length < si.group_length) | ||
711 | si.group_length = length; | ||
712 | |||
713 | ret = _prepare_one_group(ios, si.group_length, &si, &last_pg, gfp_flags); | ||
714 | if (unlikely(ret)) | ||
715 | goto out; | ||
716 | |||
717 | offset += si.group_length; | ||
718 | length -= si.group_length; | ||
719 | } | ||
720 | |||
721 | out: | ||
722 | if (!ios->length) | ||
723 | return ret; | ||
724 | |||
725 | return 0; | ||
726 | } | ||
727 | |||
728 | static ssize_t _sync_done(struct objio_state *ios) | ||
729 | { | ||
730 | struct completion *waiting = ios->private; | ||
731 | |||
732 | complete(waiting); | ||
733 | return 0; | ||
734 | } | ||
735 | |||
736 | static void _last_io(struct kref *kref) | ||
737 | { | ||
738 | struct objio_state *ios = container_of(kref, struct objio_state, kref); | ||
739 | |||
740 | ios->done(ios); | ||
741 | } | ||
742 | |||
743 | static void _done_io(struct osd_request *or, void *p) | ||
744 | { | ||
745 | struct objio_state *ios = p; | ||
746 | |||
747 | kref_put(&ios->kref, _last_io); | ||
748 | } | ||
749 | |||
750 | static ssize_t _io_exec(struct objio_state *ios) | ||
751 | { | ||
752 | DECLARE_COMPLETION_ONSTACK(wait); | ||
753 | ssize_t status = 0; /* sync status */ | ||
754 | unsigned i; | ||
755 | objio_done_fn saved_done_fn = ios->done; | ||
756 | bool sync = ios->ol_state.sync; | ||
757 | |||
758 | if (sync) { | ||
759 | ios->done = _sync_done; | ||
760 | ios->private = &wait; | ||
761 | } | ||
762 | |||
763 | kref_init(&ios->kref); | ||
764 | |||
765 | for (i = 0; i < ios->numdevs; i++) { | ||
766 | struct osd_request *or = ios->per_dev[i].or; | ||
767 | |||
768 | if (!or) | ||
769 | continue; | ||
770 | |||
771 | kref_get(&ios->kref); | ||
772 | osd_execute_request_async(or, _done_io, ios); | ||
773 | } | ||
774 | |||
775 | kref_put(&ios->kref, _last_io); | ||
776 | |||
777 | if (sync) { | ||
778 | wait_for_completion(&wait); | ||
779 | status = saved_done_fn(ios); | ||
780 | } | ||
781 | |||
782 | return status; | ||
783 | } | ||
784 | |||
785 | /* | ||
786 | * read | ||
787 | */ | ||
788 | static ssize_t _read_done(struct objio_state *ios) | ||
789 | { | ||
790 | ssize_t status; | ||
791 | int ret = _io_check(ios, false); | ||
792 | |||
793 | _io_free(ios); | ||
794 | |||
795 | if (likely(!ret)) | ||
796 | status = ios->length; | ||
797 | else | ||
798 | status = ret; | ||
799 | |||
800 | objlayout_read_done(&ios->ol_state, status, ios->ol_state.sync); | ||
801 | return status; | ||
802 | } | ||
803 | |||
804 | static int _read_mirrors(struct objio_state *ios, unsigned cur_comp) | ||
805 | { | ||
806 | struct osd_request *or = NULL; | ||
807 | struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp]; | ||
808 | unsigned dev = per_dev->dev; | ||
809 | struct pnfs_osd_object_cred *cred = | ||
810 | &ios->layout->comps[dev]; | ||
811 | struct osd_obj_id obj = { | ||
812 | .partition = cred->oc_object_id.oid_partition_id, | ||
813 | .id = cred->oc_object_id.oid_object_id, | ||
814 | }; | ||
815 | int ret; | ||
816 | |||
817 | or = osd_start_request(_io_od(ios, dev), GFP_KERNEL); | ||
818 | if (unlikely(!or)) { | ||
819 | ret = -ENOMEM; | ||
820 | goto err; | ||
821 | } | ||
822 | per_dev->or = or; | ||
823 | |||
824 | osd_req_read(or, &obj, per_dev->offset, per_dev->bio, per_dev->length); | ||
825 | |||
826 | ret = osd_finalize_request(or, 0, cred->oc_cap.cred, NULL); | ||
827 | if (ret) { | ||
828 | dprintk("%s: Faild to osd_finalize_request() => %d\n", | ||
829 | __func__, ret); | ||
830 | goto err; | ||
831 | } | ||
832 | |||
833 | dprintk("%s:[%d] dev=%d obj=0x%llx start=0x%llx length=0x%lx\n", | ||
834 | __func__, cur_comp, dev, obj.id, _LLU(per_dev->offset), | ||
835 | per_dev->length); | ||
836 | |||
837 | err: | ||
838 | return ret; | ||
839 | } | ||
840 | |||
841 | static ssize_t _read_exec(struct objio_state *ios) | ||
842 | { | ||
843 | unsigned i; | ||
844 | int ret; | ||
845 | |||
846 | for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) { | ||
847 | if (!ios->per_dev[i].length) | ||
848 | continue; | ||
849 | ret = _read_mirrors(ios, i); | ||
850 | if (unlikely(ret)) | ||
851 | goto err; | ||
852 | } | ||
853 | |||
854 | ios->done = _read_done; | ||
855 | return _io_exec(ios); /* In sync mode exec returns the io status */ | ||
856 | |||
857 | err: | ||
858 | _io_free(ios); | ||
859 | return ret; | ||
860 | } | ||
861 | |||
862 | ssize_t objio_read_pagelist(struct objlayout_io_state *ol_state) | ||
863 | { | ||
864 | struct objio_state *ios = container_of(ol_state, struct objio_state, | ||
865 | ol_state); | ||
866 | int ret; | ||
867 | |||
868 | ret = _io_rw_pagelist(ios, GFP_KERNEL); | ||
869 | if (unlikely(ret)) | ||
870 | return ret; | ||
871 | |||
872 | return _read_exec(ios); | ||
873 | } | ||
874 | |||
875 | /* | ||
876 | * write | ||
877 | */ | ||
878 | static ssize_t _write_done(struct objio_state *ios) | ||
879 | { | ||
880 | ssize_t status; | ||
881 | int ret = _io_check(ios, true); | ||
882 | |||
883 | _io_free(ios); | ||
884 | |||
885 | if (likely(!ret)) { | ||
886 | /* FIXME: should be based on the OSD's persistence model | ||
887 | * See OSD2r05 Section 4.13 Data persistence model */ | ||
888 | ios->ol_state.committed = NFS_FILE_SYNC; | ||
889 | status = ios->length; | ||
890 | } else { | ||
891 | status = ret; | ||
892 | } | ||
893 | |||
894 | objlayout_write_done(&ios->ol_state, status, ios->ol_state.sync); | ||
895 | return status; | ||
896 | } | ||
897 | |||
898 | static int _write_mirrors(struct objio_state *ios, unsigned cur_comp) | ||
899 | { | ||
900 | struct _objio_per_comp *master_dev = &ios->per_dev[cur_comp]; | ||
901 | unsigned dev = ios->per_dev[cur_comp].dev; | ||
902 | unsigned last_comp = cur_comp + ios->layout->mirrors_p1; | ||
903 | int ret; | ||
904 | |||
905 | for (; cur_comp < last_comp; ++cur_comp, ++dev) { | ||
906 | struct osd_request *or = NULL; | ||
907 | struct pnfs_osd_object_cred *cred = | ||
908 | &ios->layout->comps[dev]; | ||
909 | struct osd_obj_id obj = { | ||
910 | .partition = cred->oc_object_id.oid_partition_id, | ||
911 | .id = cred->oc_object_id.oid_object_id, | ||
912 | }; | ||
913 | struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp]; | ||
914 | struct bio *bio; | ||
915 | |||
916 | or = osd_start_request(_io_od(ios, dev), GFP_NOFS); | ||
917 | if (unlikely(!or)) { | ||
918 | ret = -ENOMEM; | ||
919 | goto err; | ||
920 | } | ||
921 | per_dev->or = or; | ||
922 | |||
923 | if (per_dev != master_dev) { | ||
924 | bio = bio_kmalloc(GFP_NOFS, | ||
925 | master_dev->bio->bi_max_vecs); | ||
926 | if (unlikely(!bio)) { | ||
927 | dprintk("Faild to allocate BIO size=%u\n", | ||
928 | master_dev->bio->bi_max_vecs); | ||
929 | ret = -ENOMEM; | ||
930 | goto err; | ||
931 | } | ||
932 | |||
933 | __bio_clone(bio, master_dev->bio); | ||
934 | bio->bi_bdev = NULL; | ||
935 | bio->bi_next = NULL; | ||
936 | per_dev->bio = bio; | ||
937 | per_dev->dev = dev; | ||
938 | per_dev->length = master_dev->length; | ||
939 | per_dev->offset = master_dev->offset; | ||
940 | } else { | ||
941 | bio = master_dev->bio; | ||
942 | bio->bi_rw |= REQ_WRITE; | ||
943 | } | ||
944 | |||
945 | osd_req_write(or, &obj, per_dev->offset, bio, per_dev->length); | ||
946 | |||
947 | ret = osd_finalize_request(or, 0, cred->oc_cap.cred, NULL); | ||
948 | if (ret) { | ||
949 | dprintk("%s: Faild to osd_finalize_request() => %d\n", | ||
950 | __func__, ret); | ||
951 | goto err; | ||
952 | } | ||
953 | |||
954 | dprintk("%s:[%d] dev=%d obj=0x%llx start=0x%llx length=0x%lx\n", | ||
955 | __func__, cur_comp, dev, obj.id, _LLU(per_dev->offset), | ||
956 | per_dev->length); | ||
957 | } | ||
958 | |||
959 | err: | ||
960 | return ret; | ||
961 | } | ||
962 | |||
963 | static ssize_t _write_exec(struct objio_state *ios) | ||
964 | { | ||
965 | unsigned i; | ||
966 | int ret; | ||
967 | |||
968 | for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) { | ||
969 | if (!ios->per_dev[i].length) | ||
970 | continue; | ||
971 | ret = _write_mirrors(ios, i); | ||
972 | if (unlikely(ret)) | ||
973 | goto err; | ||
974 | } | ||
975 | |||
976 | ios->done = _write_done; | ||
977 | return _io_exec(ios); /* In sync mode exec returns the io->status */ | ||
978 | |||
979 | err: | ||
980 | _io_free(ios); | ||
981 | return ret; | ||
982 | } | ||
983 | |||
984 | ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state, bool stable) | ||
985 | { | ||
986 | struct objio_state *ios = container_of(ol_state, struct objio_state, | ||
987 | ol_state); | ||
988 | int ret; | ||
989 | |||
990 | /* TODO: ios->stable = stable; */ | ||
991 | ret = _io_rw_pagelist(ios, GFP_NOFS); | ||
992 | if (unlikely(ret)) | ||
993 | return ret; | ||
994 | |||
995 | return _write_exec(ios); | ||
996 | } | ||
997 | |||
998 | static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, | ||
999 | struct nfs_page *prev, struct nfs_page *req) | ||
1000 | { | ||
1001 | if (!pnfs_generic_pg_test(pgio, prev, req)) | ||
1002 | return false; | ||
1003 | |||
1004 | return pgio->pg_count + req->wb_bytes <= | ||
1005 | OBJIO_LSEG(pgio->pg_lseg)->max_io_size; | ||
1006 | } | ||
1007 | |||
1008 | static struct pnfs_layoutdriver_type objlayout_type = { | ||
1009 | .id = LAYOUT_OSD2_OBJECTS, | ||
1010 | .name = "LAYOUT_OSD2_OBJECTS", | ||
1011 | .flags = PNFS_LAYOUTRET_ON_SETATTR, | ||
1012 | |||
1013 | .alloc_layout_hdr = objlayout_alloc_layout_hdr, | ||
1014 | .free_layout_hdr = objlayout_free_layout_hdr, | ||
1015 | |||
1016 | .alloc_lseg = objlayout_alloc_lseg, | ||
1017 | .free_lseg = objlayout_free_lseg, | ||
1018 | |||
1019 | .read_pagelist = objlayout_read_pagelist, | ||
1020 | .write_pagelist = objlayout_write_pagelist, | ||
1021 | .pg_test = objio_pg_test, | ||
1022 | |||
1023 | .free_deviceid_node = objio_free_deviceid_node, | ||
1024 | |||
1025 | .encode_layoutcommit = objlayout_encode_layoutcommit, | ||
1026 | .encode_layoutreturn = objlayout_encode_layoutreturn, | ||
1027 | }; | ||
1028 | |||
1029 | MODULE_DESCRIPTION("pNFS Layout Driver for OSD2 objects"); | ||
1030 | MODULE_AUTHOR("Benny Halevy <bhalevy@panasas.com>"); | ||
1031 | MODULE_LICENSE("GPL"); | ||
1032 | |||
1033 | static int __init | ||
1034 | objlayout_init(void) | ||
1035 | { | ||
1036 | int ret = pnfs_register_layoutdriver(&objlayout_type); | ||
1037 | |||
1038 | if (ret) | ||
1039 | printk(KERN_INFO | ||
1040 | "%s: Registering OSD pNFS Layout Driver failed: error=%d\n", | ||
1041 | __func__, ret); | ||
1042 | else | ||
1043 | printk(KERN_INFO "%s: Registered OSD pNFS Layout Driver\n", | ||
1044 | __func__); | ||
1045 | return ret; | ||
1046 | } | ||
1047 | |||
1048 | static void __exit | ||
1049 | objlayout_exit(void) | ||
1050 | { | ||
1051 | pnfs_unregister_layoutdriver(&objlayout_type); | ||
1052 | printk(KERN_INFO "%s: Unregistered OSD pNFS Layout Driver\n", | ||
1053 | __func__); | ||
1054 | } | ||
1055 | |||
1056 | module_init(objlayout_init); | ||
1057 | module_exit(objlayout_exit); | ||
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c new file mode 100644 index 000000000000..dc3956c0de80 --- /dev/null +++ b/fs/nfs/objlayout/objlayout.c | |||
@@ -0,0 +1,712 @@ | |||
1 | /* | ||
2 | * pNFS Objects layout driver high level definitions | ||
3 | * | ||
4 | * Copyright (C) 2007 Panasas Inc. [year of first publication] | ||
5 | * All rights reserved. | ||
6 | * | ||
7 | * Benny Halevy <bhalevy@panasas.com> | ||
8 | * Boaz Harrosh <bharrosh@panasas.com> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License version 2 | ||
12 | * See the file COPYING included with this distribution for more details. | ||
13 | * | ||
14 | * Redistribution and use in source and binary forms, with or without | ||
15 | * modification, are permitted provided that the following conditions | ||
16 | * are met: | ||
17 | * | ||
18 | * 1. Redistributions of source code must retain the above copyright | ||
19 | * notice, this list of conditions and the following disclaimer. | ||
20 | * 2. Redistributions in binary form must reproduce the above copyright | ||
21 | * notice, this list of conditions and the following disclaimer in the | ||
22 | * documentation and/or other materials provided with the distribution. | ||
23 | * 3. Neither the name of the Panasas company nor the names of its | ||
24 | * contributors may be used to endorse or promote products derived | ||
25 | * from this software without specific prior written permission. | ||
26 | * | ||
27 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED | ||
28 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF | ||
29 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
30 | * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | ||
31 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
32 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
33 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR | ||
34 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
35 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
36 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
37 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
38 | */ | ||
39 | |||
40 | #include <scsi/osd_initiator.h> | ||
41 | #include "objlayout.h" | ||
42 | |||
43 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | ||
44 | /* | ||
45 | * Create a objlayout layout structure for the given inode and return it. | ||
46 | */ | ||
47 | struct pnfs_layout_hdr * | ||
48 | objlayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) | ||
49 | { | ||
50 | struct objlayout *objlay; | ||
51 | |||
52 | objlay = kzalloc(sizeof(struct objlayout), gfp_flags); | ||
53 | if (objlay) { | ||
54 | spin_lock_init(&objlay->lock); | ||
55 | INIT_LIST_HEAD(&objlay->err_list); | ||
56 | } | ||
57 | dprintk("%s: Return %p\n", __func__, objlay); | ||
58 | return &objlay->pnfs_layout; | ||
59 | } | ||
60 | |||
61 | /* | ||
62 | * Free an objlayout layout structure | ||
63 | */ | ||
64 | void | ||
65 | objlayout_free_layout_hdr(struct pnfs_layout_hdr *lo) | ||
66 | { | ||
67 | struct objlayout *objlay = OBJLAYOUT(lo); | ||
68 | |||
69 | dprintk("%s: objlay %p\n", __func__, objlay); | ||
70 | |||
71 | WARN_ON(!list_empty(&objlay->err_list)); | ||
72 | kfree(objlay); | ||
73 | } | ||
74 | |||
75 | /* | ||
76 | * Unmarshall layout and store it in pnfslay. | ||
77 | */ | ||
78 | struct pnfs_layout_segment * | ||
79 | objlayout_alloc_lseg(struct pnfs_layout_hdr *pnfslay, | ||
80 | struct nfs4_layoutget_res *lgr, | ||
81 | gfp_t gfp_flags) | ||
82 | { | ||
83 | int status = -ENOMEM; | ||
84 | struct xdr_stream stream; | ||
85 | struct xdr_buf buf = { | ||
86 | .pages = lgr->layoutp->pages, | ||
87 | .page_len = lgr->layoutp->len, | ||
88 | .buflen = lgr->layoutp->len, | ||
89 | .len = lgr->layoutp->len, | ||
90 | }; | ||
91 | struct page *scratch; | ||
92 | struct pnfs_layout_segment *lseg; | ||
93 | |||
94 | dprintk("%s: Begin pnfslay %p\n", __func__, pnfslay); | ||
95 | |||
96 | scratch = alloc_page(gfp_flags); | ||
97 | if (!scratch) | ||
98 | goto err_nofree; | ||
99 | |||
100 | xdr_init_decode(&stream, &buf, NULL); | ||
101 | xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); | ||
102 | |||
103 | status = objio_alloc_lseg(&lseg, pnfslay, &lgr->range, &stream, gfp_flags); | ||
104 | if (unlikely(status)) { | ||
105 | dprintk("%s: objio_alloc_lseg Return err %d\n", __func__, | ||
106 | status); | ||
107 | goto err; | ||
108 | } | ||
109 | |||
110 | __free_page(scratch); | ||
111 | |||
112 | dprintk("%s: Return %p\n", __func__, lseg); | ||
113 | return lseg; | ||
114 | |||
115 | err: | ||
116 | __free_page(scratch); | ||
117 | err_nofree: | ||
118 | dprintk("%s: Err Return=>%d\n", __func__, status); | ||
119 | return ERR_PTR(status); | ||
120 | } | ||
121 | |||
122 | /* | ||
123 | * Free a layout segement | ||
124 | */ | ||
125 | void | ||
126 | objlayout_free_lseg(struct pnfs_layout_segment *lseg) | ||
127 | { | ||
128 | dprintk("%s: freeing layout segment %p\n", __func__, lseg); | ||
129 | |||
130 | if (unlikely(!lseg)) | ||
131 | return; | ||
132 | |||
133 | objio_free_lseg(lseg); | ||
134 | } | ||
135 | |||
136 | /* | ||
137 | * I/O Operations | ||
138 | */ | ||
139 | static inline u64 | ||
140 | end_offset(u64 start, u64 len) | ||
141 | { | ||
142 | u64 end; | ||
143 | |||
144 | end = start + len; | ||
145 | return end >= start ? end : NFS4_MAX_UINT64; | ||
146 | } | ||
147 | |||
148 | /* last octet in a range */ | ||
149 | static inline u64 | ||
150 | last_byte_offset(u64 start, u64 len) | ||
151 | { | ||
152 | u64 end; | ||
153 | |||
154 | BUG_ON(!len); | ||
155 | end = start + len; | ||
156 | return end > start ? end - 1 : NFS4_MAX_UINT64; | ||
157 | } | ||
158 | |||
159 | static struct objlayout_io_state * | ||
160 | objlayout_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type, | ||
161 | struct page **pages, | ||
162 | unsigned pgbase, | ||
163 | loff_t offset, | ||
164 | size_t count, | ||
165 | struct pnfs_layout_segment *lseg, | ||
166 | void *rpcdata, | ||
167 | gfp_t gfp_flags) | ||
168 | { | ||
169 | struct objlayout_io_state *state; | ||
170 | u64 lseg_end_offset; | ||
171 | |||
172 | dprintk("%s: allocating io_state\n", __func__); | ||
173 | if (objio_alloc_io_state(lseg, &state, gfp_flags)) | ||
174 | return NULL; | ||
175 | |||
176 | BUG_ON(offset < lseg->pls_range.offset); | ||
177 | lseg_end_offset = end_offset(lseg->pls_range.offset, | ||
178 | lseg->pls_range.length); | ||
179 | BUG_ON(offset >= lseg_end_offset); | ||
180 | if (offset + count > lseg_end_offset) { | ||
181 | count = lseg->pls_range.length - | ||
182 | (offset - lseg->pls_range.offset); | ||
183 | dprintk("%s: truncated count %Zd\n", __func__, count); | ||
184 | } | ||
185 | |||
186 | if (pgbase > PAGE_SIZE) { | ||
187 | pages += pgbase >> PAGE_SHIFT; | ||
188 | pgbase &= ~PAGE_MASK; | ||
189 | } | ||
190 | |||
191 | INIT_LIST_HEAD(&state->err_list); | ||
192 | state->lseg = lseg; | ||
193 | state->rpcdata = rpcdata; | ||
194 | state->pages = pages; | ||
195 | state->pgbase = pgbase; | ||
196 | state->nr_pages = (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
197 | state->offset = offset; | ||
198 | state->count = count; | ||
199 | state->sync = 0; | ||
200 | |||
201 | return state; | ||
202 | } | ||
203 | |||
204 | static void | ||
205 | objlayout_free_io_state(struct objlayout_io_state *state) | ||
206 | { | ||
207 | dprintk("%s: freeing io_state\n", __func__); | ||
208 | if (unlikely(!state)) | ||
209 | return; | ||
210 | |||
211 | objio_free_io_state(state); | ||
212 | } | ||
213 | |||
214 | /* | ||
215 | * I/O done common code | ||
216 | */ | ||
217 | static void | ||
218 | objlayout_iodone(struct objlayout_io_state *state) | ||
219 | { | ||
220 | dprintk("%s: state %p status\n", __func__, state); | ||
221 | |||
222 | if (likely(state->status >= 0)) { | ||
223 | objlayout_free_io_state(state); | ||
224 | } else { | ||
225 | struct objlayout *objlay = OBJLAYOUT(state->lseg->pls_layout); | ||
226 | |||
227 | spin_lock(&objlay->lock); | ||
228 | objlay->delta_space_valid = OBJ_DSU_INVALID; | ||
229 | list_add(&objlay->err_list, &state->err_list); | ||
230 | spin_unlock(&objlay->lock); | ||
231 | } | ||
232 | } | ||
233 | |||
234 | /* | ||
235 | * objlayout_io_set_result - Set an osd_error code on a specific osd comp. | ||
236 | * | ||
237 | * The @index component IO failed (error returned from target). Register | ||
238 | * the error for later reporting at layout-return. | ||
239 | */ | ||
240 | void | ||
241 | objlayout_io_set_result(struct objlayout_io_state *state, unsigned index, | ||
242 | struct pnfs_osd_objid *pooid, int osd_error, | ||
243 | u64 offset, u64 length, bool is_write) | ||
244 | { | ||
245 | struct pnfs_osd_ioerr *ioerr = &state->ioerrs[index]; | ||
246 | |||
247 | BUG_ON(index >= state->num_comps); | ||
248 | if (osd_error) { | ||
249 | ioerr->oer_component = *pooid; | ||
250 | ioerr->oer_comp_offset = offset; | ||
251 | ioerr->oer_comp_length = length; | ||
252 | ioerr->oer_iswrite = is_write; | ||
253 | ioerr->oer_errno = osd_error; | ||
254 | |||
255 | dprintk("%s: err[%d]: errno=%d is_write=%d dev(%llx:%llx) " | ||
256 | "par=0x%llx obj=0x%llx offset=0x%llx length=0x%llx\n", | ||
257 | __func__, index, ioerr->oer_errno, | ||
258 | ioerr->oer_iswrite, | ||
259 | _DEVID_LO(&ioerr->oer_component.oid_device_id), | ||
260 | _DEVID_HI(&ioerr->oer_component.oid_device_id), | ||
261 | ioerr->oer_component.oid_partition_id, | ||
262 | ioerr->oer_component.oid_object_id, | ||
263 | ioerr->oer_comp_offset, | ||
264 | ioerr->oer_comp_length); | ||
265 | } else { | ||
266 | /* User need not call if no error is reported */ | ||
267 | ioerr->oer_errno = 0; | ||
268 | } | ||
269 | } | ||
270 | |||
271 | /* Function scheduled on rpc workqueue to call ->nfs_readlist_complete(). | ||
272 | * This is because the osd completion is called with ints-off from | ||
273 | * the block layer | ||
274 | */ | ||
275 | static void _rpc_read_complete(struct work_struct *work) | ||
276 | { | ||
277 | struct rpc_task *task; | ||
278 | struct nfs_read_data *rdata; | ||
279 | |||
280 | dprintk("%s enter\n", __func__); | ||
281 | task = container_of(work, struct rpc_task, u.tk_work); | ||
282 | rdata = container_of(task, struct nfs_read_data, task); | ||
283 | |||
284 | pnfs_ld_read_done(rdata); | ||
285 | } | ||
286 | |||
287 | void | ||
288 | objlayout_read_done(struct objlayout_io_state *state, ssize_t status, bool sync) | ||
289 | { | ||
290 | int eof = state->eof; | ||
291 | struct nfs_read_data *rdata; | ||
292 | |||
293 | state->status = status; | ||
294 | dprintk("%s: Begin status=%ld eof=%d\n", __func__, status, eof); | ||
295 | rdata = state->rpcdata; | ||
296 | rdata->task.tk_status = status; | ||
297 | if (status >= 0) { | ||
298 | rdata->res.count = status; | ||
299 | rdata->res.eof = eof; | ||
300 | } | ||
301 | objlayout_iodone(state); | ||
302 | /* must not use state after this point */ | ||
303 | |||
304 | if (sync) | ||
305 | pnfs_ld_read_done(rdata); | ||
306 | else { | ||
307 | INIT_WORK(&rdata->task.u.tk_work, _rpc_read_complete); | ||
308 | schedule_work(&rdata->task.u.tk_work); | ||
309 | } | ||
310 | } | ||
311 | |||
312 | /* | ||
313 | * Perform sync or async reads. | ||
314 | */ | ||
315 | enum pnfs_try_status | ||
316 | objlayout_read_pagelist(struct nfs_read_data *rdata) | ||
317 | { | ||
318 | loff_t offset = rdata->args.offset; | ||
319 | size_t count = rdata->args.count; | ||
320 | struct objlayout_io_state *state; | ||
321 | ssize_t status = 0; | ||
322 | loff_t eof; | ||
323 | |||
324 | dprintk("%s: Begin inode %p offset %llu count %d\n", | ||
325 | __func__, rdata->inode, offset, (int)count); | ||
326 | |||
327 | eof = i_size_read(rdata->inode); | ||
328 | if (unlikely(offset + count > eof)) { | ||
329 | if (offset >= eof) { | ||
330 | status = 0; | ||
331 | rdata->res.count = 0; | ||
332 | rdata->res.eof = 1; | ||
333 | goto out; | ||
334 | } | ||
335 | count = eof - offset; | ||
336 | } | ||
337 | |||
338 | state = objlayout_alloc_io_state(NFS_I(rdata->inode)->layout, | ||
339 | rdata->args.pages, rdata->args.pgbase, | ||
340 | offset, count, | ||
341 | rdata->lseg, rdata, | ||
342 | GFP_KERNEL); | ||
343 | if (unlikely(!state)) { | ||
344 | status = -ENOMEM; | ||
345 | goto out; | ||
346 | } | ||
347 | |||
348 | state->eof = state->offset + state->count >= eof; | ||
349 | |||
350 | status = objio_read_pagelist(state); | ||
351 | out: | ||
352 | dprintk("%s: Return status %Zd\n", __func__, status); | ||
353 | rdata->pnfs_error = status; | ||
354 | return PNFS_ATTEMPTED; | ||
355 | } | ||
356 | |||
357 | /* Function scheduled on rpc workqueue to call ->nfs_writelist_complete(). | ||
358 | * This is because the osd completion is called with ints-off from | ||
359 | * the block layer | ||
360 | */ | ||
361 | static void _rpc_write_complete(struct work_struct *work) | ||
362 | { | ||
363 | struct rpc_task *task; | ||
364 | struct nfs_write_data *wdata; | ||
365 | |||
366 | dprintk("%s enter\n", __func__); | ||
367 | task = container_of(work, struct rpc_task, u.tk_work); | ||
368 | wdata = container_of(task, struct nfs_write_data, task); | ||
369 | |||
370 | pnfs_ld_write_done(wdata); | ||
371 | } | ||
372 | |||
373 | void | ||
374 | objlayout_write_done(struct objlayout_io_state *state, ssize_t status, | ||
375 | bool sync) | ||
376 | { | ||
377 | struct nfs_write_data *wdata; | ||
378 | |||
379 | dprintk("%s: Begin\n", __func__); | ||
380 | wdata = state->rpcdata; | ||
381 | state->status = status; | ||
382 | wdata->task.tk_status = status; | ||
383 | if (status >= 0) { | ||
384 | wdata->res.count = status; | ||
385 | wdata->verf.committed = state->committed; | ||
386 | dprintk("%s: Return status %d committed %d\n", | ||
387 | __func__, wdata->task.tk_status, | ||
388 | wdata->verf.committed); | ||
389 | } else | ||
390 | dprintk("%s: Return status %d\n", | ||
391 | __func__, wdata->task.tk_status); | ||
392 | objlayout_iodone(state); | ||
393 | /* must not use state after this point */ | ||
394 | |||
395 | if (sync) | ||
396 | pnfs_ld_write_done(wdata); | ||
397 | else { | ||
398 | INIT_WORK(&wdata->task.u.tk_work, _rpc_write_complete); | ||
399 | schedule_work(&wdata->task.u.tk_work); | ||
400 | } | ||
401 | } | ||
402 | |||
403 | /* | ||
404 | * Perform sync or async writes. | ||
405 | */ | ||
406 | enum pnfs_try_status | ||
407 | objlayout_write_pagelist(struct nfs_write_data *wdata, | ||
408 | int how) | ||
409 | { | ||
410 | struct objlayout_io_state *state; | ||
411 | ssize_t status; | ||
412 | |||
413 | dprintk("%s: Begin inode %p offset %llu count %u\n", | ||
414 | __func__, wdata->inode, wdata->args.offset, wdata->args.count); | ||
415 | |||
416 | state = objlayout_alloc_io_state(NFS_I(wdata->inode)->layout, | ||
417 | wdata->args.pages, | ||
418 | wdata->args.pgbase, | ||
419 | wdata->args.offset, | ||
420 | wdata->args.count, | ||
421 | wdata->lseg, wdata, | ||
422 | GFP_NOFS); | ||
423 | if (unlikely(!state)) { | ||
424 | status = -ENOMEM; | ||
425 | goto out; | ||
426 | } | ||
427 | |||
428 | state->sync = how & FLUSH_SYNC; | ||
429 | |||
430 | status = objio_write_pagelist(state, how & FLUSH_STABLE); | ||
431 | out: | ||
432 | dprintk("%s: Return status %Zd\n", __func__, status); | ||
433 | wdata->pnfs_error = status; | ||
434 | return PNFS_ATTEMPTED; | ||
435 | } | ||
436 | |||
437 | void | ||
438 | objlayout_encode_layoutcommit(struct pnfs_layout_hdr *pnfslay, | ||
439 | struct xdr_stream *xdr, | ||
440 | const struct nfs4_layoutcommit_args *args) | ||
441 | { | ||
442 | struct objlayout *objlay = OBJLAYOUT(pnfslay); | ||
443 | struct pnfs_osd_layoutupdate lou; | ||
444 | __be32 *start; | ||
445 | |||
446 | dprintk("%s: Begin\n", __func__); | ||
447 | |||
448 | spin_lock(&objlay->lock); | ||
449 | lou.dsu_valid = (objlay->delta_space_valid == OBJ_DSU_VALID); | ||
450 | lou.dsu_delta = objlay->delta_space_used; | ||
451 | objlay->delta_space_used = 0; | ||
452 | objlay->delta_space_valid = OBJ_DSU_INIT; | ||
453 | lou.olu_ioerr_flag = !list_empty(&objlay->err_list); | ||
454 | spin_unlock(&objlay->lock); | ||
455 | |||
456 | start = xdr_reserve_space(xdr, 4); | ||
457 | |||
458 | BUG_ON(pnfs_osd_xdr_encode_layoutupdate(xdr, &lou)); | ||
459 | |||
460 | *start = cpu_to_be32((xdr->p - start - 1) * 4); | ||
461 | |||
462 | dprintk("%s: Return delta_space_used %lld err %d\n", __func__, | ||
463 | lou.dsu_delta, lou.olu_ioerr_flag); | ||
464 | } | ||
465 | |||
466 | static int | ||
467 | err_prio(u32 oer_errno) | ||
468 | { | ||
469 | switch (oer_errno) { | ||
470 | case 0: | ||
471 | return 0; | ||
472 | |||
473 | case PNFS_OSD_ERR_RESOURCE: | ||
474 | return OSD_ERR_PRI_RESOURCE; | ||
475 | case PNFS_OSD_ERR_BAD_CRED: | ||
476 | return OSD_ERR_PRI_BAD_CRED; | ||
477 | case PNFS_OSD_ERR_NO_ACCESS: | ||
478 | return OSD_ERR_PRI_NO_ACCESS; | ||
479 | case PNFS_OSD_ERR_UNREACHABLE: | ||
480 | return OSD_ERR_PRI_UNREACHABLE; | ||
481 | case PNFS_OSD_ERR_NOT_FOUND: | ||
482 | return OSD_ERR_PRI_NOT_FOUND; | ||
483 | case PNFS_OSD_ERR_NO_SPACE: | ||
484 | return OSD_ERR_PRI_NO_SPACE; | ||
485 | default: | ||
486 | WARN_ON(1); | ||
487 | /* fallthrough */ | ||
488 | case PNFS_OSD_ERR_EIO: | ||
489 | return OSD_ERR_PRI_EIO; | ||
490 | } | ||
491 | } | ||
492 | |||
493 | static void | ||
494 | merge_ioerr(struct pnfs_osd_ioerr *dest_err, | ||
495 | const struct pnfs_osd_ioerr *src_err) | ||
496 | { | ||
497 | u64 dest_end, src_end; | ||
498 | |||
499 | if (!dest_err->oer_errno) { | ||
500 | *dest_err = *src_err; | ||
501 | /* accumulated device must be blank */ | ||
502 | memset(&dest_err->oer_component.oid_device_id, 0, | ||
503 | sizeof(dest_err->oer_component.oid_device_id)); | ||
504 | |||
505 | return; | ||
506 | } | ||
507 | |||
508 | if (dest_err->oer_component.oid_partition_id != | ||
509 | src_err->oer_component.oid_partition_id) | ||
510 | dest_err->oer_component.oid_partition_id = 0; | ||
511 | |||
512 | if (dest_err->oer_component.oid_object_id != | ||
513 | src_err->oer_component.oid_object_id) | ||
514 | dest_err->oer_component.oid_object_id = 0; | ||
515 | |||
516 | if (dest_err->oer_comp_offset > src_err->oer_comp_offset) | ||
517 | dest_err->oer_comp_offset = src_err->oer_comp_offset; | ||
518 | |||
519 | dest_end = end_offset(dest_err->oer_comp_offset, | ||
520 | dest_err->oer_comp_length); | ||
521 | src_end = end_offset(src_err->oer_comp_offset, | ||
522 | src_err->oer_comp_length); | ||
523 | if (dest_end < src_end) | ||
524 | dest_end = src_end; | ||
525 | |||
526 | dest_err->oer_comp_length = dest_end - dest_err->oer_comp_offset; | ||
527 | |||
528 | if ((src_err->oer_iswrite == dest_err->oer_iswrite) && | ||
529 | (err_prio(src_err->oer_errno) > err_prio(dest_err->oer_errno))) { | ||
530 | dest_err->oer_errno = src_err->oer_errno; | ||
531 | } else if (src_err->oer_iswrite) { | ||
532 | dest_err->oer_iswrite = true; | ||
533 | dest_err->oer_errno = src_err->oer_errno; | ||
534 | } | ||
535 | } | ||
536 | |||
537 | static void | ||
538 | encode_accumulated_error(struct objlayout *objlay, __be32 *p) | ||
539 | { | ||
540 | struct objlayout_io_state *state, *tmp; | ||
541 | struct pnfs_osd_ioerr accumulated_err = {.oer_errno = 0}; | ||
542 | |||
543 | list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) { | ||
544 | unsigned i; | ||
545 | |||
546 | for (i = 0; i < state->num_comps; i++) { | ||
547 | struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i]; | ||
548 | |||
549 | if (!ioerr->oer_errno) | ||
550 | continue; | ||
551 | |||
552 | printk(KERN_ERR "%s: err[%d]: errno=%d is_write=%d " | ||
553 | "dev(%llx:%llx) par=0x%llx obj=0x%llx " | ||
554 | "offset=0x%llx length=0x%llx\n", | ||
555 | __func__, i, ioerr->oer_errno, | ||
556 | ioerr->oer_iswrite, | ||
557 | _DEVID_LO(&ioerr->oer_component.oid_device_id), | ||
558 | _DEVID_HI(&ioerr->oer_component.oid_device_id), | ||
559 | ioerr->oer_component.oid_partition_id, | ||
560 | ioerr->oer_component.oid_object_id, | ||
561 | ioerr->oer_comp_offset, | ||
562 | ioerr->oer_comp_length); | ||
563 | |||
564 | merge_ioerr(&accumulated_err, ioerr); | ||
565 | } | ||
566 | list_del(&state->err_list); | ||
567 | objlayout_free_io_state(state); | ||
568 | } | ||
569 | |||
570 | pnfs_osd_xdr_encode_ioerr(p, &accumulated_err); | ||
571 | } | ||
572 | |||
573 | void | ||
574 | objlayout_encode_layoutreturn(struct pnfs_layout_hdr *pnfslay, | ||
575 | struct xdr_stream *xdr, | ||
576 | const struct nfs4_layoutreturn_args *args) | ||
577 | { | ||
578 | struct objlayout *objlay = OBJLAYOUT(pnfslay); | ||
579 | struct objlayout_io_state *state, *tmp; | ||
580 | __be32 *start; | ||
581 | |||
582 | dprintk("%s: Begin\n", __func__); | ||
583 | start = xdr_reserve_space(xdr, 4); | ||
584 | BUG_ON(!start); | ||
585 | |||
586 | spin_lock(&objlay->lock); | ||
587 | |||
588 | list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) { | ||
589 | __be32 *last_xdr = NULL, *p; | ||
590 | unsigned i; | ||
591 | int res = 0; | ||
592 | |||
593 | for (i = 0; i < state->num_comps; i++) { | ||
594 | struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i]; | ||
595 | |||
596 | if (!ioerr->oer_errno) | ||
597 | continue; | ||
598 | |||
599 | dprintk("%s: err[%d]: errno=%d is_write=%d " | ||
600 | "dev(%llx:%llx) par=0x%llx obj=0x%llx " | ||
601 | "offset=0x%llx length=0x%llx\n", | ||
602 | __func__, i, ioerr->oer_errno, | ||
603 | ioerr->oer_iswrite, | ||
604 | _DEVID_LO(&ioerr->oer_component.oid_device_id), | ||
605 | _DEVID_HI(&ioerr->oer_component.oid_device_id), | ||
606 | ioerr->oer_component.oid_partition_id, | ||
607 | ioerr->oer_component.oid_object_id, | ||
608 | ioerr->oer_comp_offset, | ||
609 | ioerr->oer_comp_length); | ||
610 | |||
611 | p = pnfs_osd_xdr_ioerr_reserve_space(xdr); | ||
612 | if (unlikely(!p)) { | ||
613 | res = -E2BIG; | ||
614 | break; /* accumulated_error */ | ||
615 | } | ||
616 | |||
617 | last_xdr = p; | ||
618 | pnfs_osd_xdr_encode_ioerr(p, &state->ioerrs[i]); | ||
619 | } | ||
620 | |||
621 | /* TODO: use xdr_write_pages */ | ||
622 | if (unlikely(res)) { | ||
623 | /* no space for even one error descriptor */ | ||
624 | BUG_ON(!last_xdr); | ||
625 | |||
626 | /* we've encountered a situation with lots and lots of | ||
627 | * errors and no space to encode them all. Use the last | ||
628 | * available slot to report the union of all the | ||
629 | * remaining errors. | ||
630 | */ | ||
631 | encode_accumulated_error(objlay, last_xdr); | ||
632 | goto loop_done; | ||
633 | } | ||
634 | list_del(&state->err_list); | ||
635 | objlayout_free_io_state(state); | ||
636 | } | ||
637 | loop_done: | ||
638 | spin_unlock(&objlay->lock); | ||
639 | |||
640 | *start = cpu_to_be32((xdr->p - start - 1) * 4); | ||
641 | dprintk("%s: Return\n", __func__); | ||
642 | } | ||
643 | |||
644 | |||
645 | /* | ||
646 | * Get Device Info API for io engines | ||
647 | */ | ||
648 | struct objlayout_deviceinfo { | ||
649 | struct page *page; | ||
650 | struct pnfs_osd_deviceaddr da; /* This must be last */ | ||
651 | }; | ||
652 | |||
653 | /* Initialize and call nfs_getdeviceinfo, then decode and return a | ||
654 | * "struct pnfs_osd_deviceaddr *" Eventually objlayout_put_deviceinfo() | ||
655 | * should be called. | ||
656 | */ | ||
657 | int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay, | ||
658 | struct nfs4_deviceid *d_id, struct pnfs_osd_deviceaddr **deviceaddr, | ||
659 | gfp_t gfp_flags) | ||
660 | { | ||
661 | struct objlayout_deviceinfo *odi; | ||
662 | struct pnfs_device pd; | ||
663 | struct super_block *sb; | ||
664 | struct page *page, **pages; | ||
665 | u32 *p; | ||
666 | int err; | ||
667 | |||
668 | page = alloc_page(gfp_flags); | ||
669 | if (!page) | ||
670 | return -ENOMEM; | ||
671 | |||
672 | pages = &page; | ||
673 | pd.pages = pages; | ||
674 | |||
675 | memcpy(&pd.dev_id, d_id, sizeof(*d_id)); | ||
676 | pd.layout_type = LAYOUT_OSD2_OBJECTS; | ||
677 | pd.pages = &page; | ||
678 | pd.pgbase = 0; | ||
679 | pd.pglen = PAGE_SIZE; | ||
680 | pd.mincount = 0; | ||
681 | |||
682 | sb = pnfslay->plh_inode->i_sb; | ||
683 | err = nfs4_proc_getdeviceinfo(NFS_SERVER(pnfslay->plh_inode), &pd); | ||
684 | dprintk("%s nfs_getdeviceinfo returned %d\n", __func__, err); | ||
685 | if (err) | ||
686 | goto err_out; | ||
687 | |||
688 | p = page_address(page); | ||
689 | odi = kzalloc(sizeof(*odi), gfp_flags); | ||
690 | if (!odi) { | ||
691 | err = -ENOMEM; | ||
692 | goto err_out; | ||
693 | } | ||
694 | pnfs_osd_xdr_decode_deviceaddr(&odi->da, p); | ||
695 | odi->page = page; | ||
696 | *deviceaddr = &odi->da; | ||
697 | return 0; | ||
698 | |||
699 | err_out: | ||
700 | __free_page(page); | ||
701 | return err; | ||
702 | } | ||
703 | |||
704 | void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr) | ||
705 | { | ||
706 | struct objlayout_deviceinfo *odi = container_of(deviceaddr, | ||
707 | struct objlayout_deviceinfo, | ||
708 | da); | ||
709 | |||
710 | __free_page(odi->page); | ||
711 | kfree(odi); | ||
712 | } | ||
diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h new file mode 100644 index 000000000000..a8244c8e042d --- /dev/null +++ b/fs/nfs/objlayout/objlayout.h | |||
@@ -0,0 +1,187 @@ | |||
1 | /* | ||
2 | * Data types and function declerations for interfacing with the | ||
3 | * pNFS standard object layout driver. | ||
4 | * | ||
5 | * Copyright (C) 2007 Panasas Inc. [year of first publication] | ||
6 | * All rights reserved. | ||
7 | * | ||
8 | * Benny Halevy <bhalevy@panasas.com> | ||
9 | * Boaz Harrosh <bharrosh@panasas.com> | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of the GNU General Public License version 2 | ||
13 | * See the file COPYING included with this distribution for more details. | ||
14 | * | ||
15 | * Redistribution and use in source and binary forms, with or without | ||
16 | * modification, are permitted provided that the following conditions | ||
17 | * are met: | ||
18 | * | ||
19 | * 1. Redistributions of source code must retain the above copyright | ||
20 | * notice, this list of conditions and the following disclaimer. | ||
21 | * 2. Redistributions in binary form must reproduce the above copyright | ||
22 | * notice, this list of conditions and the following disclaimer in the | ||
23 | * documentation and/or other materials provided with the distribution. | ||
24 | * 3. Neither the name of the Panasas company nor the names of its | ||
25 | * contributors may be used to endorse or promote products derived | ||
26 | * from this software without specific prior written permission. | ||
27 | * | ||
28 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED | ||
29 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF | ||
30 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
31 | * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | ||
32 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
33 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
34 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR | ||
35 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
36 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
37 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
38 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
39 | */ | ||
40 | |||
41 | #ifndef _OBJLAYOUT_H | ||
42 | #define _OBJLAYOUT_H | ||
43 | |||
44 | #include <linux/nfs_fs.h> | ||
45 | #include <linux/pnfs_osd_xdr.h> | ||
46 | #include "../pnfs.h" | ||
47 | |||
48 | /* | ||
49 | * per-inode layout | ||
50 | */ | ||
51 | struct objlayout { | ||
52 | struct pnfs_layout_hdr pnfs_layout; | ||
53 | |||
54 | /* for layout_commit */ | ||
55 | enum osd_delta_space_valid_enum { | ||
56 | OBJ_DSU_INIT = 0, | ||
57 | OBJ_DSU_VALID, | ||
58 | OBJ_DSU_INVALID, | ||
59 | } delta_space_valid; | ||
60 | s64 delta_space_used; /* consumed by write ops */ | ||
61 | |||
62 | /* for layout_return */ | ||
63 | spinlock_t lock; | ||
64 | struct list_head err_list; | ||
65 | }; | ||
66 | |||
67 | static inline struct objlayout * | ||
68 | OBJLAYOUT(struct pnfs_layout_hdr *lo) | ||
69 | { | ||
70 | return container_of(lo, struct objlayout, pnfs_layout); | ||
71 | } | ||
72 | |||
73 | /* | ||
74 | * per-I/O operation state | ||
75 | * embedded in objects provider io_state data structure | ||
76 | */ | ||
77 | struct objlayout_io_state { | ||
78 | struct pnfs_layout_segment *lseg; | ||
79 | |||
80 | struct page **pages; | ||
81 | unsigned pgbase; | ||
82 | unsigned nr_pages; | ||
83 | unsigned long count; | ||
84 | loff_t offset; | ||
85 | bool sync; | ||
86 | |||
87 | void *rpcdata; | ||
88 | int status; /* res */ | ||
89 | int eof; /* res */ | ||
90 | int committed; /* res */ | ||
91 | |||
92 | /* Error reporting (layout_return) */ | ||
93 | struct list_head err_list; | ||
94 | unsigned num_comps; | ||
95 | /* Pointer to array of error descriptors of size num_comps. | ||
96 | * It should contain as many entries as devices in the osd_layout | ||
97 | * that participate in the I/O. It is up to the io_engine to allocate | ||
98 | * needed space and set num_comps. | ||
99 | */ | ||
100 | struct pnfs_osd_ioerr *ioerrs; | ||
101 | }; | ||
102 | |||
103 | /* | ||
104 | * Raid engine I/O API | ||
105 | */ | ||
106 | extern int objio_alloc_lseg(struct pnfs_layout_segment **outp, | ||
107 | struct pnfs_layout_hdr *pnfslay, | ||
108 | struct pnfs_layout_range *range, | ||
109 | struct xdr_stream *xdr, | ||
110 | gfp_t gfp_flags); | ||
111 | extern void objio_free_lseg(struct pnfs_layout_segment *lseg); | ||
112 | |||
113 | extern int objio_alloc_io_state( | ||
114 | struct pnfs_layout_segment *lseg, | ||
115 | struct objlayout_io_state **outp, | ||
116 | gfp_t gfp_flags); | ||
117 | extern void objio_free_io_state(struct objlayout_io_state *state); | ||
118 | |||
119 | extern ssize_t objio_read_pagelist(struct objlayout_io_state *ol_state); | ||
120 | extern ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state, | ||
121 | bool stable); | ||
122 | |||
123 | /* | ||
124 | * callback API | ||
125 | */ | ||
126 | extern void objlayout_io_set_result(struct objlayout_io_state *state, | ||
127 | unsigned index, struct pnfs_osd_objid *pooid, | ||
128 | int osd_error, u64 offset, u64 length, bool is_write); | ||
129 | |||
130 | static inline void | ||
131 | objlayout_add_delta_space_used(struct objlayout_io_state *state, s64 space_used) | ||
132 | { | ||
133 | struct objlayout *objlay = OBJLAYOUT(state->lseg->pls_layout); | ||
134 | |||
135 | /* If one of the I/Os errored out and the delta_space_used was | ||
136 | * invalid we render the complete report as invalid. Protocol mandate | ||
137 | * the DSU be accurate or not reported. | ||
138 | */ | ||
139 | spin_lock(&objlay->lock); | ||
140 | if (objlay->delta_space_valid != OBJ_DSU_INVALID) { | ||
141 | objlay->delta_space_valid = OBJ_DSU_VALID; | ||
142 | objlay->delta_space_used += space_used; | ||
143 | } | ||
144 | spin_unlock(&objlay->lock); | ||
145 | } | ||
146 | |||
147 | extern void objlayout_read_done(struct objlayout_io_state *state, | ||
148 | ssize_t status, bool sync); | ||
149 | extern void objlayout_write_done(struct objlayout_io_state *state, | ||
150 | ssize_t status, bool sync); | ||
151 | |||
152 | extern int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay, | ||
153 | struct nfs4_deviceid *d_id, struct pnfs_osd_deviceaddr **deviceaddr, | ||
154 | gfp_t gfp_flags); | ||
155 | extern void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr); | ||
156 | |||
157 | /* | ||
158 | * exported generic objects function vectors | ||
159 | */ | ||
160 | |||
161 | extern struct pnfs_layout_hdr *objlayout_alloc_layout_hdr(struct inode *, gfp_t gfp_flags); | ||
162 | extern void objlayout_free_layout_hdr(struct pnfs_layout_hdr *); | ||
163 | |||
164 | extern struct pnfs_layout_segment *objlayout_alloc_lseg( | ||
165 | struct pnfs_layout_hdr *, | ||
166 | struct nfs4_layoutget_res *, | ||
167 | gfp_t gfp_flags); | ||
168 | extern void objlayout_free_lseg(struct pnfs_layout_segment *); | ||
169 | |||
170 | extern enum pnfs_try_status objlayout_read_pagelist( | ||
171 | struct nfs_read_data *); | ||
172 | |||
173 | extern enum pnfs_try_status objlayout_write_pagelist( | ||
174 | struct nfs_write_data *, | ||
175 | int how); | ||
176 | |||
177 | extern void objlayout_encode_layoutcommit( | ||
178 | struct pnfs_layout_hdr *, | ||
179 | struct xdr_stream *, | ||
180 | const struct nfs4_layoutcommit_args *); | ||
181 | |||
182 | extern void objlayout_encode_layoutreturn( | ||
183 | struct pnfs_layout_hdr *, | ||
184 | struct xdr_stream *, | ||
185 | const struct nfs4_layoutreturn_args *); | ||
186 | |||
187 | #endif /* _OBJLAYOUT_H */ | ||
diff --git a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c new file mode 100644 index 000000000000..16fc758e9123 --- /dev/null +++ b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c | |||
@@ -0,0 +1,412 @@ | |||
1 | /* | ||
2 | * Object-Based pNFS Layout XDR layer | ||
3 | * | ||
4 | * Copyright (C) 2007 Panasas Inc. [year of first publication] | ||
5 | * All rights reserved. | ||
6 | * | ||
7 | * Benny Halevy <bhalevy@panasas.com> | ||
8 | * Boaz Harrosh <bharrosh@panasas.com> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License version 2 | ||
12 | * See the file COPYING included with this distribution for more details. | ||
13 | * | ||
14 | * Redistribution and use in source and binary forms, with or without | ||
15 | * modification, are permitted provided that the following conditions | ||
16 | * are met: | ||
17 | * | ||
18 | * 1. Redistributions of source code must retain the above copyright | ||
19 | * notice, this list of conditions and the following disclaimer. | ||
20 | * 2. Redistributions in binary form must reproduce the above copyright | ||
21 | * notice, this list of conditions and the following disclaimer in the | ||
22 | * documentation and/or other materials provided with the distribution. | ||
23 | * 3. Neither the name of the Panasas company nor the names of its | ||
24 | * contributors may be used to endorse or promote products derived | ||
25 | * from this software without specific prior written permission. | ||
26 | * | ||
27 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED | ||
28 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF | ||
29 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
30 | * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | ||
31 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
32 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
33 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR | ||
34 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
35 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
36 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
37 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
38 | */ | ||
39 | |||
40 | #include <linux/pnfs_osd_xdr.h> | ||
41 | |||
42 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | ||
43 | |||
44 | /* | ||
45 | * The following implementation is based on RFC5664 | ||
46 | */ | ||
47 | |||
48 | /* | ||
49 | * struct pnfs_osd_objid { | ||
50 | * struct nfs4_deviceid oid_device_id; | ||
51 | * u64 oid_partition_id; | ||
52 | * u64 oid_object_id; | ||
53 | * }; // xdr size 32 bytes | ||
54 | */ | ||
55 | static __be32 * | ||
56 | _osd_xdr_decode_objid(__be32 *p, struct pnfs_osd_objid *objid) | ||
57 | { | ||
58 | p = xdr_decode_opaque_fixed(p, objid->oid_device_id.data, | ||
59 | sizeof(objid->oid_device_id.data)); | ||
60 | |||
61 | p = xdr_decode_hyper(p, &objid->oid_partition_id); | ||
62 | p = xdr_decode_hyper(p, &objid->oid_object_id); | ||
63 | return p; | ||
64 | } | ||
65 | /* | ||
66 | * struct pnfs_osd_opaque_cred { | ||
67 | * u32 cred_len; | ||
68 | * void *cred; | ||
69 | * }; // xdr size [variable] | ||
70 | * The return pointers are from the xdr buffer | ||
71 | */ | ||
72 | static int | ||
73 | _osd_xdr_decode_opaque_cred(struct pnfs_osd_opaque_cred *opaque_cred, | ||
74 | struct xdr_stream *xdr) | ||
75 | { | ||
76 | __be32 *p = xdr_inline_decode(xdr, 1); | ||
77 | |||
78 | if (!p) | ||
79 | return -EINVAL; | ||
80 | |||
81 | opaque_cred->cred_len = be32_to_cpu(*p++); | ||
82 | |||
83 | p = xdr_inline_decode(xdr, opaque_cred->cred_len); | ||
84 | if (!p) | ||
85 | return -EINVAL; | ||
86 | |||
87 | opaque_cred->cred = p; | ||
88 | return 0; | ||
89 | } | ||
90 | |||
91 | /* | ||
92 | * struct pnfs_osd_object_cred { | ||
93 | * struct pnfs_osd_objid oc_object_id; | ||
94 | * u32 oc_osd_version; | ||
95 | * u32 oc_cap_key_sec; | ||
96 | * struct pnfs_osd_opaque_cred oc_cap_key | ||
97 | * struct pnfs_osd_opaque_cred oc_cap; | ||
98 | * }; // xdr size 32 + 4 + 4 + [variable] + [variable] | ||
99 | */ | ||
100 | static int | ||
101 | _osd_xdr_decode_object_cred(struct pnfs_osd_object_cred *comp, | ||
102 | struct xdr_stream *xdr) | ||
103 | { | ||
104 | __be32 *p = xdr_inline_decode(xdr, 32 + 4 + 4); | ||
105 | int ret; | ||
106 | |||
107 | if (!p) | ||
108 | return -EIO; | ||
109 | |||
110 | p = _osd_xdr_decode_objid(p, &comp->oc_object_id); | ||
111 | comp->oc_osd_version = be32_to_cpup(p++); | ||
112 | comp->oc_cap_key_sec = be32_to_cpup(p); | ||
113 | |||
114 | ret = _osd_xdr_decode_opaque_cred(&comp->oc_cap_key, xdr); | ||
115 | if (unlikely(ret)) | ||
116 | return ret; | ||
117 | |||
118 | ret = _osd_xdr_decode_opaque_cred(&comp->oc_cap, xdr); | ||
119 | return ret; | ||
120 | } | ||
121 | |||
122 | /* | ||
123 | * struct pnfs_osd_data_map { | ||
124 | * u32 odm_num_comps; | ||
125 | * u64 odm_stripe_unit; | ||
126 | * u32 odm_group_width; | ||
127 | * u32 odm_group_depth; | ||
128 | * u32 odm_mirror_cnt; | ||
129 | * u32 odm_raid_algorithm; | ||
130 | * }; // xdr size 4 + 8 + 4 + 4 + 4 + 4 | ||
131 | */ | ||
132 | static inline int | ||
133 | _osd_data_map_xdr_sz(void) | ||
134 | { | ||
135 | return 4 + 8 + 4 + 4 + 4 + 4; | ||
136 | } | ||
137 | |||
138 | static __be32 * | ||
139 | _osd_xdr_decode_data_map(__be32 *p, struct pnfs_osd_data_map *data_map) | ||
140 | { | ||
141 | data_map->odm_num_comps = be32_to_cpup(p++); | ||
142 | p = xdr_decode_hyper(p, &data_map->odm_stripe_unit); | ||
143 | data_map->odm_group_width = be32_to_cpup(p++); | ||
144 | data_map->odm_group_depth = be32_to_cpup(p++); | ||
145 | data_map->odm_mirror_cnt = be32_to_cpup(p++); | ||
146 | data_map->odm_raid_algorithm = be32_to_cpup(p++); | ||
147 | dprintk("%s: odm_num_comps=%u odm_stripe_unit=%llu odm_group_width=%u " | ||
148 | "odm_group_depth=%u odm_mirror_cnt=%u odm_raid_algorithm=%u\n", | ||
149 | __func__, | ||
150 | data_map->odm_num_comps, | ||
151 | (unsigned long long)data_map->odm_stripe_unit, | ||
152 | data_map->odm_group_width, | ||
153 | data_map->odm_group_depth, | ||
154 | data_map->odm_mirror_cnt, | ||
155 | data_map->odm_raid_algorithm); | ||
156 | return p; | ||
157 | } | ||
158 | |||
159 | int pnfs_osd_xdr_decode_layout_map(struct pnfs_osd_layout *layout, | ||
160 | struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr) | ||
161 | { | ||
162 | __be32 *p; | ||
163 | |||
164 | memset(iter, 0, sizeof(*iter)); | ||
165 | |||
166 | p = xdr_inline_decode(xdr, _osd_data_map_xdr_sz() + 4 + 4); | ||
167 | if (unlikely(!p)) | ||
168 | return -EINVAL; | ||
169 | |||
170 | p = _osd_xdr_decode_data_map(p, &layout->olo_map); | ||
171 | layout->olo_comps_index = be32_to_cpup(p++); | ||
172 | layout->olo_num_comps = be32_to_cpup(p++); | ||
173 | iter->total_comps = layout->olo_num_comps; | ||
174 | return 0; | ||
175 | } | ||
176 | |||
177 | bool pnfs_osd_xdr_decode_layout_comp(struct pnfs_osd_object_cred *comp, | ||
178 | struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr, | ||
179 | int *err) | ||
180 | { | ||
181 | BUG_ON(iter->decoded_comps > iter->total_comps); | ||
182 | if (iter->decoded_comps == iter->total_comps) | ||
183 | return false; | ||
184 | |||
185 | *err = _osd_xdr_decode_object_cred(comp, xdr); | ||
186 | if (unlikely(*err)) { | ||
187 | dprintk("%s: _osd_xdr_decode_object_cred=>%d decoded_comps=%d " | ||
188 | "total_comps=%d\n", __func__, *err, | ||
189 | iter->decoded_comps, iter->total_comps); | ||
190 | return false; /* stop the loop */ | ||
191 | } | ||
192 | dprintk("%s: dev(%llx:%llx) par=0x%llx obj=0x%llx " | ||
193 | "key_len=%u cap_len=%u\n", | ||
194 | __func__, | ||
195 | _DEVID_LO(&comp->oc_object_id.oid_device_id), | ||
196 | _DEVID_HI(&comp->oc_object_id.oid_device_id), | ||
197 | comp->oc_object_id.oid_partition_id, | ||
198 | comp->oc_object_id.oid_object_id, | ||
199 | comp->oc_cap_key.cred_len, comp->oc_cap.cred_len); | ||
200 | |||
201 | iter->decoded_comps++; | ||
202 | return true; | ||
203 | } | ||
204 | |||
205 | /* | ||
206 | * Get Device Information Decoding | ||
207 | * | ||
208 | * Note: since Device Information is currently done synchronously, all | ||
209 | * variable strings fields are left inside the rpc buffer and are only | ||
210 | * pointed to by the pnfs_osd_deviceaddr members. So the read buffer | ||
211 | * should not be freed while the returned information is in use. | ||
212 | */ | ||
213 | /* | ||
214 | *struct nfs4_string { | ||
215 | * unsigned int len; | ||
216 | * char *data; | ||
217 | *}; // size [variable] | ||
218 | * NOTE: Returned string points to inside the XDR buffer | ||
219 | */ | ||
220 | static __be32 * | ||
221 | __read_u8_opaque(__be32 *p, struct nfs4_string *str) | ||
222 | { | ||
223 | str->len = be32_to_cpup(p++); | ||
224 | str->data = (char *)p; | ||
225 | |||
226 | p += XDR_QUADLEN(str->len); | ||
227 | return p; | ||
228 | } | ||
229 | |||
230 | /* | ||
231 | * struct pnfs_osd_targetid { | ||
232 | * u32 oti_type; | ||
233 | * struct nfs4_string oti_scsi_device_id; | ||
234 | * };// size 4 + [variable] | ||
235 | */ | ||
236 | static __be32 * | ||
237 | __read_targetid(__be32 *p, struct pnfs_osd_targetid* targetid) | ||
238 | { | ||
239 | u32 oti_type; | ||
240 | |||
241 | oti_type = be32_to_cpup(p++); | ||
242 | targetid->oti_type = oti_type; | ||
243 | |||
244 | switch (oti_type) { | ||
245 | case OBJ_TARGET_SCSI_NAME: | ||
246 | case OBJ_TARGET_SCSI_DEVICE_ID: | ||
247 | p = __read_u8_opaque(p, &targetid->oti_scsi_device_id); | ||
248 | } | ||
249 | |||
250 | return p; | ||
251 | } | ||
252 | |||
253 | /* | ||
254 | * struct pnfs_osd_net_addr { | ||
255 | * struct nfs4_string r_netid; | ||
256 | * struct nfs4_string r_addr; | ||
257 | * }; | ||
258 | */ | ||
259 | static __be32 * | ||
260 | __read_net_addr(__be32 *p, struct pnfs_osd_net_addr* netaddr) | ||
261 | { | ||
262 | p = __read_u8_opaque(p, &netaddr->r_netid); | ||
263 | p = __read_u8_opaque(p, &netaddr->r_addr); | ||
264 | |||
265 | return p; | ||
266 | } | ||
267 | |||
268 | /* | ||
269 | * struct pnfs_osd_targetaddr { | ||
270 | * u32 ota_available; | ||
271 | * struct pnfs_osd_net_addr ota_netaddr; | ||
272 | * }; | ||
273 | */ | ||
274 | static __be32 * | ||
275 | __read_targetaddr(__be32 *p, struct pnfs_osd_targetaddr *targetaddr) | ||
276 | { | ||
277 | u32 ota_available; | ||
278 | |||
279 | ota_available = be32_to_cpup(p++); | ||
280 | targetaddr->ota_available = ota_available; | ||
281 | |||
282 | if (ota_available) | ||
283 | p = __read_net_addr(p, &targetaddr->ota_netaddr); | ||
284 | |||
285 | |||
286 | return p; | ||
287 | } | ||
288 | |||
289 | /* | ||
290 | * struct pnfs_osd_deviceaddr { | ||
291 | * struct pnfs_osd_targetid oda_targetid; | ||
292 | * struct pnfs_osd_targetaddr oda_targetaddr; | ||
293 | * u8 oda_lun[8]; | ||
294 | * struct nfs4_string oda_systemid; | ||
295 | * struct pnfs_osd_object_cred oda_root_obj_cred; | ||
296 | * struct nfs4_string oda_osdname; | ||
297 | * }; | ||
298 | */ | ||
299 | |||
300 | /* We need this version for the pnfs_osd_xdr_decode_deviceaddr which does | ||
301 | * not have an xdr_stream | ||
302 | */ | ||
303 | static __be32 * | ||
304 | __read_opaque_cred(__be32 *p, | ||
305 | struct pnfs_osd_opaque_cred *opaque_cred) | ||
306 | { | ||
307 | opaque_cred->cred_len = be32_to_cpu(*p++); | ||
308 | opaque_cred->cred = p; | ||
309 | return p + XDR_QUADLEN(opaque_cred->cred_len); | ||
310 | } | ||
311 | |||
312 | static __be32 * | ||
313 | __read_object_cred(__be32 *p, struct pnfs_osd_object_cred *comp) | ||
314 | { | ||
315 | p = _osd_xdr_decode_objid(p, &comp->oc_object_id); | ||
316 | comp->oc_osd_version = be32_to_cpup(p++); | ||
317 | comp->oc_cap_key_sec = be32_to_cpup(p++); | ||
318 | |||
319 | p = __read_opaque_cred(p, &comp->oc_cap_key); | ||
320 | p = __read_opaque_cred(p, &comp->oc_cap); | ||
321 | return p; | ||
322 | } | ||
323 | |||
324 | void pnfs_osd_xdr_decode_deviceaddr( | ||
325 | struct pnfs_osd_deviceaddr *deviceaddr, __be32 *p) | ||
326 | { | ||
327 | p = __read_targetid(p, &deviceaddr->oda_targetid); | ||
328 | |||
329 | p = __read_targetaddr(p, &deviceaddr->oda_targetaddr); | ||
330 | |||
331 | p = xdr_decode_opaque_fixed(p, deviceaddr->oda_lun, | ||
332 | sizeof(deviceaddr->oda_lun)); | ||
333 | |||
334 | p = __read_u8_opaque(p, &deviceaddr->oda_systemid); | ||
335 | |||
336 | p = __read_object_cred(p, &deviceaddr->oda_root_obj_cred); | ||
337 | |||
338 | p = __read_u8_opaque(p, &deviceaddr->oda_osdname); | ||
339 | |||
340 | /* libosd likes this terminated in dbg. It's last, so no problems */ | ||
341 | deviceaddr->oda_osdname.data[deviceaddr->oda_osdname.len] = 0; | ||
342 | } | ||
343 | |||
344 | /* | ||
345 | * struct pnfs_osd_layoutupdate { | ||
346 | * u32 dsu_valid; | ||
347 | * s64 dsu_delta; | ||
348 | * u32 olu_ioerr_flag; | ||
349 | * }; xdr size 4 + 8 + 4 | ||
350 | */ | ||
351 | int | ||
352 | pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr, | ||
353 | struct pnfs_osd_layoutupdate *lou) | ||
354 | { | ||
355 | __be32 *p = xdr_reserve_space(xdr, 4 + 8 + 4); | ||
356 | |||
357 | if (!p) | ||
358 | return -E2BIG; | ||
359 | |||
360 | *p++ = cpu_to_be32(lou->dsu_valid); | ||
361 | if (lou->dsu_valid) | ||
362 | p = xdr_encode_hyper(p, lou->dsu_delta); | ||
363 | *p++ = cpu_to_be32(lou->olu_ioerr_flag); | ||
364 | return 0; | ||
365 | } | ||
366 | |||
367 | /* | ||
368 | * struct pnfs_osd_objid { | ||
369 | * struct nfs4_deviceid oid_device_id; | ||
370 | * u64 oid_partition_id; | ||
371 | * u64 oid_object_id; | ||
372 | * }; // xdr size 32 bytes | ||
373 | */ | ||
374 | static inline __be32 * | ||
375 | pnfs_osd_xdr_encode_objid(__be32 *p, struct pnfs_osd_objid *object_id) | ||
376 | { | ||
377 | p = xdr_encode_opaque_fixed(p, &object_id->oid_device_id.data, | ||
378 | sizeof(object_id->oid_device_id.data)); | ||
379 | p = xdr_encode_hyper(p, object_id->oid_partition_id); | ||
380 | p = xdr_encode_hyper(p, object_id->oid_object_id); | ||
381 | |||
382 | return p; | ||
383 | } | ||
384 | |||
385 | /* | ||
386 | * struct pnfs_osd_ioerr { | ||
387 | * struct pnfs_osd_objid oer_component; | ||
388 | * u64 oer_comp_offset; | ||
389 | * u64 oer_comp_length; | ||
390 | * u32 oer_iswrite; | ||
391 | * u32 oer_errno; | ||
392 | * }; // xdr size 32 + 24 bytes | ||
393 | */ | ||
394 | void pnfs_osd_xdr_encode_ioerr(__be32 *p, struct pnfs_osd_ioerr *ioerr) | ||
395 | { | ||
396 | p = pnfs_osd_xdr_encode_objid(p, &ioerr->oer_component); | ||
397 | p = xdr_encode_hyper(p, ioerr->oer_comp_offset); | ||
398 | p = xdr_encode_hyper(p, ioerr->oer_comp_length); | ||
399 | *p++ = cpu_to_be32(ioerr->oer_iswrite); | ||
400 | *p = cpu_to_be32(ioerr->oer_errno); | ||
401 | } | ||
402 | |||
403 | __be32 *pnfs_osd_xdr_ioerr_reserve_space(struct xdr_stream *xdr) | ||
404 | { | ||
405 | __be32 *p; | ||
406 | |||
407 | p = xdr_reserve_space(xdr, 32 + 24); | ||
408 | if (unlikely(!p)) | ||
409 | dprintk("%s: out of xdr space\n", __func__); | ||
410 | |||
411 | return p; | ||
412 | } | ||
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index c80add6e2213..7913961aff22 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c | |||
@@ -204,6 +204,21 @@ nfs_wait_on_request(struct nfs_page *req) | |||
204 | TASK_UNINTERRUPTIBLE); | 204 | TASK_UNINTERRUPTIBLE); |
205 | } | 205 | } |
206 | 206 | ||
207 | static bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req) | ||
208 | { | ||
209 | /* | ||
210 | * FIXME: ideally we should be able to coalesce all requests | ||
211 | * that are not block boundary aligned, but currently this | ||
212 | * is problematic for the case of bsize < PAGE_CACHE_SIZE, | ||
213 | * since nfs_flush_multi and nfs_pagein_multi assume you | ||
214 | * can have only one struct nfs_page. | ||
215 | */ | ||
216 | if (desc->pg_bsize < PAGE_SIZE) | ||
217 | return 0; | ||
218 | |||
219 | return desc->pg_count + req->wb_bytes <= desc->pg_bsize; | ||
220 | } | ||
221 | |||
207 | /** | 222 | /** |
208 | * nfs_pageio_init - initialise a page io descriptor | 223 | * nfs_pageio_init - initialise a page io descriptor |
209 | * @desc: pointer to descriptor | 224 | * @desc: pointer to descriptor |
@@ -229,6 +244,8 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, | |||
229 | desc->pg_ioflags = io_flags; | 244 | desc->pg_ioflags = io_flags; |
230 | desc->pg_error = 0; | 245 | desc->pg_error = 0; |
231 | desc->pg_lseg = NULL; | 246 | desc->pg_lseg = NULL; |
247 | desc->pg_test = nfs_generic_pg_test; | ||
248 | pnfs_pageio_init(desc, inode); | ||
232 | } | 249 | } |
233 | 250 | ||
234 | /** | 251 | /** |
@@ -242,29 +259,23 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, | |||
242 | * | 259 | * |
243 | * Return 'true' if this is the case, else return 'false'. | 260 | * Return 'true' if this is the case, else return 'false'. |
244 | */ | 261 | */ |
245 | static int nfs_can_coalesce_requests(struct nfs_page *prev, | 262 | static bool nfs_can_coalesce_requests(struct nfs_page *prev, |
246 | struct nfs_page *req, | 263 | struct nfs_page *req, |
247 | struct nfs_pageio_descriptor *pgio) | 264 | struct nfs_pageio_descriptor *pgio) |
248 | { | 265 | { |
249 | if (req->wb_context->cred != prev->wb_context->cred) | 266 | if (req->wb_context->cred != prev->wb_context->cred) |
250 | return 0; | 267 | return false; |
251 | if (req->wb_lock_context->lockowner != prev->wb_lock_context->lockowner) | 268 | if (req->wb_lock_context->lockowner != prev->wb_lock_context->lockowner) |
252 | return 0; | 269 | return false; |
253 | if (req->wb_context->state != prev->wb_context->state) | 270 | if (req->wb_context->state != prev->wb_context->state) |
254 | return 0; | 271 | return false; |
255 | if (req->wb_index != (prev->wb_index + 1)) | 272 | if (req->wb_index != (prev->wb_index + 1)) |
256 | return 0; | 273 | return false; |
257 | if (req->wb_pgbase != 0) | 274 | if (req->wb_pgbase != 0) |
258 | return 0; | 275 | return false; |
259 | if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) | 276 | if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) |
260 | return 0; | 277 | return false; |
261 | /* | 278 | return pgio->pg_test(pgio, prev, req); |
262 | * Non-whole file layouts need to check that req is inside of | ||
263 | * pgio->pg_lseg. | ||
264 | */ | ||
265 | if (pgio->pg_test && !pgio->pg_test(pgio, prev, req)) | ||
266 | return 0; | ||
267 | return 1; | ||
268 | } | 279 | } |
269 | 280 | ||
270 | /** | 281 | /** |
@@ -278,31 +289,18 @@ static int nfs_can_coalesce_requests(struct nfs_page *prev, | |||
278 | static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, | 289 | static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, |
279 | struct nfs_page *req) | 290 | struct nfs_page *req) |
280 | { | 291 | { |
281 | size_t newlen = req->wb_bytes; | ||
282 | |||
283 | if (desc->pg_count != 0) { | 292 | if (desc->pg_count != 0) { |
284 | struct nfs_page *prev; | 293 | struct nfs_page *prev; |
285 | 294 | ||
286 | /* | ||
287 | * FIXME: ideally we should be able to coalesce all requests | ||
288 | * that are not block boundary aligned, but currently this | ||
289 | * is problematic for the case of bsize < PAGE_CACHE_SIZE, | ||
290 | * since nfs_flush_multi and nfs_pagein_multi assume you | ||
291 | * can have only one struct nfs_page. | ||
292 | */ | ||
293 | if (desc->pg_bsize < PAGE_SIZE) | ||
294 | return 0; | ||
295 | newlen += desc->pg_count; | ||
296 | if (newlen > desc->pg_bsize) | ||
297 | return 0; | ||
298 | prev = nfs_list_entry(desc->pg_list.prev); | 295 | prev = nfs_list_entry(desc->pg_list.prev); |
299 | if (!nfs_can_coalesce_requests(prev, req, desc)) | 296 | if (!nfs_can_coalesce_requests(prev, req, desc)) |
300 | return 0; | 297 | return 0; |
301 | } else | 298 | } else { |
302 | desc->pg_base = req->wb_pgbase; | 299 | desc->pg_base = req->wb_pgbase; |
300 | } | ||
303 | nfs_list_remove_request(req); | 301 | nfs_list_remove_request(req); |
304 | nfs_list_add_request(req, &desc->pg_list); | 302 | nfs_list_add_request(req, &desc->pg_list); |
305 | desc->pg_count = newlen; | 303 | desc->pg_count += req->wb_bytes; |
306 | return 1; | 304 | return 1; |
307 | } | 305 | } |
308 | 306 | ||
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index f57f5281a520..8c1309d852a6 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
@@ -177,13 +177,28 @@ get_layout_hdr(struct pnfs_layout_hdr *lo) | |||
177 | atomic_inc(&lo->plh_refcount); | 177 | atomic_inc(&lo->plh_refcount); |
178 | } | 178 | } |
179 | 179 | ||
180 | static struct pnfs_layout_hdr * | ||
181 | pnfs_alloc_layout_hdr(struct inode *ino, gfp_t gfp_flags) | ||
182 | { | ||
183 | struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld; | ||
184 | return ld->alloc_layout_hdr ? ld->alloc_layout_hdr(ino, gfp_flags) : | ||
185 | kzalloc(sizeof(struct pnfs_layout_hdr), gfp_flags); | ||
186 | } | ||
187 | |||
188 | static void | ||
189 | pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo) | ||
190 | { | ||
191 | struct pnfs_layoutdriver_type *ld = NFS_SERVER(lo->plh_inode)->pnfs_curr_ld; | ||
192 | return ld->alloc_layout_hdr ? ld->free_layout_hdr(lo) : kfree(lo); | ||
193 | } | ||
194 | |||
180 | static void | 195 | static void |
181 | destroy_layout_hdr(struct pnfs_layout_hdr *lo) | 196 | destroy_layout_hdr(struct pnfs_layout_hdr *lo) |
182 | { | 197 | { |
183 | dprintk("%s: freeing layout cache %p\n", __func__, lo); | 198 | dprintk("%s: freeing layout cache %p\n", __func__, lo); |
184 | BUG_ON(!list_empty(&lo->plh_layouts)); | 199 | BUG_ON(!list_empty(&lo->plh_layouts)); |
185 | NFS_I(lo->plh_inode)->layout = NULL; | 200 | NFS_I(lo->plh_inode)->layout = NULL; |
186 | kfree(lo); | 201 | pnfs_free_layout_hdr(lo); |
187 | } | 202 | } |
188 | 203 | ||
189 | static void | 204 | static void |
@@ -228,7 +243,7 @@ put_lseg_common(struct pnfs_layout_segment *lseg) | |||
228 | { | 243 | { |
229 | struct inode *inode = lseg->pls_layout->plh_inode; | 244 | struct inode *inode = lseg->pls_layout->plh_inode; |
230 | 245 | ||
231 | BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); | 246 | WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); |
232 | list_del_init(&lseg->pls_list); | 247 | list_del_init(&lseg->pls_list); |
233 | if (list_empty(&lseg->pls_layout->plh_segs)) { | 248 | if (list_empty(&lseg->pls_layout->plh_segs)) { |
234 | set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags); | 249 | set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags); |
@@ -261,11 +276,72 @@ put_lseg(struct pnfs_layout_segment *lseg) | |||
261 | } | 276 | } |
262 | EXPORT_SYMBOL_GPL(put_lseg); | 277 | EXPORT_SYMBOL_GPL(put_lseg); |
263 | 278 | ||
279 | static inline u64 | ||
280 | end_offset(u64 start, u64 len) | ||
281 | { | ||
282 | u64 end; | ||
283 | |||
284 | end = start + len; | ||
285 | return end >= start ? end : NFS4_MAX_UINT64; | ||
286 | } | ||
287 | |||
288 | /* last octet in a range */ | ||
289 | static inline u64 | ||
290 | last_byte_offset(u64 start, u64 len) | ||
291 | { | ||
292 | u64 end; | ||
293 | |||
294 | BUG_ON(!len); | ||
295 | end = start + len; | ||
296 | return end > start ? end - 1 : NFS4_MAX_UINT64; | ||
297 | } | ||
298 | |||
299 | /* | ||
300 | * is l2 fully contained in l1? | ||
301 | * start1 end1 | ||
302 | * [----------------------------------) | ||
303 | * start2 end2 | ||
304 | * [----------------) | ||
305 | */ | ||
306 | static inline int | ||
307 | lo_seg_contained(struct pnfs_layout_range *l1, | ||
308 | struct pnfs_layout_range *l2) | ||
309 | { | ||
310 | u64 start1 = l1->offset; | ||
311 | u64 end1 = end_offset(start1, l1->length); | ||
312 | u64 start2 = l2->offset; | ||
313 | u64 end2 = end_offset(start2, l2->length); | ||
314 | |||
315 | return (start1 <= start2) && (end1 >= end2); | ||
316 | } | ||
317 | |||
318 | /* | ||
319 | * is l1 and l2 intersecting? | ||
320 | * start1 end1 | ||
321 | * [----------------------------------) | ||
322 | * start2 end2 | ||
323 | * [----------------) | ||
324 | */ | ||
325 | static inline int | ||
326 | lo_seg_intersecting(struct pnfs_layout_range *l1, | ||
327 | struct pnfs_layout_range *l2) | ||
328 | { | ||
329 | u64 start1 = l1->offset; | ||
330 | u64 end1 = end_offset(start1, l1->length); | ||
331 | u64 start2 = l2->offset; | ||
332 | u64 end2 = end_offset(start2, l2->length); | ||
333 | |||
334 | return (end1 == NFS4_MAX_UINT64 || end1 > start2) && | ||
335 | (end2 == NFS4_MAX_UINT64 || end2 > start1); | ||
336 | } | ||
337 | |||
264 | static bool | 338 | static bool |
265 | should_free_lseg(u32 lseg_iomode, u32 recall_iomode) | 339 | should_free_lseg(struct pnfs_layout_range *lseg_range, |
340 | struct pnfs_layout_range *recall_range) | ||
266 | { | 341 | { |
267 | return (recall_iomode == IOMODE_ANY || | 342 | return (recall_range->iomode == IOMODE_ANY || |
268 | lseg_iomode == recall_iomode); | 343 | lseg_range->iomode == recall_range->iomode) && |
344 | lo_seg_intersecting(lseg_range, recall_range); | ||
269 | } | 345 | } |
270 | 346 | ||
271 | /* Returns 1 if lseg is removed from list, 0 otherwise */ | 347 | /* Returns 1 if lseg is removed from list, 0 otherwise */ |
@@ -296,7 +372,7 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, | |||
296 | int | 372 | int |
297 | mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, | 373 | mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, |
298 | struct list_head *tmp_list, | 374 | struct list_head *tmp_list, |
299 | u32 iomode) | 375 | struct pnfs_layout_range *recall_range) |
300 | { | 376 | { |
301 | struct pnfs_layout_segment *lseg, *next; | 377 | struct pnfs_layout_segment *lseg, *next; |
302 | int invalid = 0, removed = 0; | 378 | int invalid = 0, removed = 0; |
@@ -309,7 +385,8 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, | |||
309 | return 0; | 385 | return 0; |
310 | } | 386 | } |
311 | list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) | 387 | list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) |
312 | if (should_free_lseg(lseg->pls_range.iomode, iomode)) { | 388 | if (!recall_range || |
389 | should_free_lseg(&lseg->pls_range, recall_range)) { | ||
313 | dprintk("%s: freeing lseg %p iomode %d " | 390 | dprintk("%s: freeing lseg %p iomode %d " |
314 | "offset %llu length %llu\n", __func__, | 391 | "offset %llu length %llu\n", __func__, |
315 | lseg, lseg->pls_range.iomode, lseg->pls_range.offset, | 392 | lseg, lseg->pls_range.iomode, lseg->pls_range.offset, |
@@ -358,7 +435,7 @@ pnfs_destroy_layout(struct nfs_inode *nfsi) | |||
358 | lo = nfsi->layout; | 435 | lo = nfsi->layout; |
359 | if (lo) { | 436 | if (lo) { |
360 | lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */ | 437 | lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */ |
361 | mark_matching_lsegs_invalid(lo, &tmp_list, IOMODE_ANY); | 438 | mark_matching_lsegs_invalid(lo, &tmp_list, NULL); |
362 | } | 439 | } |
363 | spin_unlock(&nfsi->vfs_inode.i_lock); | 440 | spin_unlock(&nfsi->vfs_inode.i_lock); |
364 | pnfs_free_lseg_list(&tmp_list); | 441 | pnfs_free_lseg_list(&tmp_list); |
@@ -467,7 +544,7 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, | |||
467 | static struct pnfs_layout_segment * | 544 | static struct pnfs_layout_segment * |
468 | send_layoutget(struct pnfs_layout_hdr *lo, | 545 | send_layoutget(struct pnfs_layout_hdr *lo, |
469 | struct nfs_open_context *ctx, | 546 | struct nfs_open_context *ctx, |
470 | u32 iomode, | 547 | struct pnfs_layout_range *range, |
471 | gfp_t gfp_flags) | 548 | gfp_t gfp_flags) |
472 | { | 549 | { |
473 | struct inode *ino = lo->plh_inode; | 550 | struct inode *ino = lo->plh_inode; |
@@ -499,11 +576,11 @@ send_layoutget(struct pnfs_layout_hdr *lo, | |||
499 | goto out_err_free; | 576 | goto out_err_free; |
500 | } | 577 | } |
501 | 578 | ||
502 | lgp->args.minlength = NFS4_MAX_UINT64; | 579 | lgp->args.minlength = PAGE_CACHE_SIZE; |
580 | if (lgp->args.minlength > range->length) | ||
581 | lgp->args.minlength = range->length; | ||
503 | lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; | 582 | lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; |
504 | lgp->args.range.iomode = iomode; | 583 | lgp->args.range = *range; |
505 | lgp->args.range.offset = 0; | ||
506 | lgp->args.range.length = NFS4_MAX_UINT64; | ||
507 | lgp->args.type = server->pnfs_curr_ld->id; | 584 | lgp->args.type = server->pnfs_curr_ld->id; |
508 | lgp->args.inode = ino; | 585 | lgp->args.inode = ino; |
509 | lgp->args.ctx = get_nfs_open_context(ctx); | 586 | lgp->args.ctx = get_nfs_open_context(ctx); |
@@ -518,7 +595,7 @@ send_layoutget(struct pnfs_layout_hdr *lo, | |||
518 | nfs4_proc_layoutget(lgp); | 595 | nfs4_proc_layoutget(lgp); |
519 | if (!lseg) { | 596 | if (!lseg) { |
520 | /* remember that LAYOUTGET failed and suspend trying */ | 597 | /* remember that LAYOUTGET failed and suspend trying */ |
521 | set_bit(lo_fail_bit(iomode), &lo->plh_flags); | 598 | set_bit(lo_fail_bit(range->iomode), &lo->plh_flags); |
522 | } | 599 | } |
523 | 600 | ||
524 | /* free xdr pages */ | 601 | /* free xdr pages */ |
@@ -542,6 +619,51 @@ out_err_free: | |||
542 | return NULL; | 619 | return NULL; |
543 | } | 620 | } |
544 | 621 | ||
622 | /* Initiates a LAYOUTRETURN(FILE) */ | ||
623 | int | ||
624 | _pnfs_return_layout(struct inode *ino) | ||
625 | { | ||
626 | struct pnfs_layout_hdr *lo = NULL; | ||
627 | struct nfs_inode *nfsi = NFS_I(ino); | ||
628 | LIST_HEAD(tmp_list); | ||
629 | struct nfs4_layoutreturn *lrp; | ||
630 | nfs4_stateid stateid; | ||
631 | int status = 0; | ||
632 | |||
633 | dprintk("--> %s\n", __func__); | ||
634 | |||
635 | spin_lock(&ino->i_lock); | ||
636 | lo = nfsi->layout; | ||
637 | if (!lo || !mark_matching_lsegs_invalid(lo, &tmp_list, NULL)) { | ||
638 | spin_unlock(&ino->i_lock); | ||
639 | dprintk("%s: no layout segments to return\n", __func__); | ||
640 | goto out; | ||
641 | } | ||
642 | stateid = nfsi->layout->plh_stateid; | ||
643 | /* Reference matched in nfs4_layoutreturn_release */ | ||
644 | get_layout_hdr(lo); | ||
645 | spin_unlock(&ino->i_lock); | ||
646 | pnfs_free_lseg_list(&tmp_list); | ||
647 | |||
648 | WARN_ON(test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)); | ||
649 | |||
650 | lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); | ||
651 | if (unlikely(lrp == NULL)) { | ||
652 | status = -ENOMEM; | ||
653 | goto out; | ||
654 | } | ||
655 | |||
656 | lrp->args.stateid = stateid; | ||
657 | lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; | ||
658 | lrp->args.inode = ino; | ||
659 | lrp->clp = NFS_SERVER(ino)->nfs_client; | ||
660 | |||
661 | status = nfs4_proc_layoutreturn(lrp); | ||
662 | out: | ||
663 | dprintk("<-- %s status: %d\n", __func__, status); | ||
664 | return status; | ||
665 | } | ||
666 | |||
545 | bool pnfs_roc(struct inode *ino) | 667 | bool pnfs_roc(struct inode *ino) |
546 | { | 668 | { |
547 | struct pnfs_layout_hdr *lo; | 669 | struct pnfs_layout_hdr *lo; |
@@ -625,10 +747,23 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier) | |||
625 | * are seen first. | 747 | * are seen first. |
626 | */ | 748 | */ |
627 | static s64 | 749 | static s64 |
628 | cmp_layout(u32 iomode1, u32 iomode2) | 750 | cmp_layout(struct pnfs_layout_range *l1, |
751 | struct pnfs_layout_range *l2) | ||
629 | { | 752 | { |
753 | s64 d; | ||
754 | |||
755 | /* high offset > low offset */ | ||
756 | d = l1->offset - l2->offset; | ||
757 | if (d) | ||
758 | return d; | ||
759 | |||
760 | /* short length > long length */ | ||
761 | d = l2->length - l1->length; | ||
762 | if (d) | ||
763 | return d; | ||
764 | |||
630 | /* read > read/write */ | 765 | /* read > read/write */ |
631 | return (int)(iomode2 == IOMODE_READ) - (int)(iomode1 == IOMODE_READ); | 766 | return (int)(l1->iomode == IOMODE_READ) - (int)(l2->iomode == IOMODE_READ); |
632 | } | 767 | } |
633 | 768 | ||
634 | static void | 769 | static void |
@@ -636,13 +771,12 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo, | |||
636 | struct pnfs_layout_segment *lseg) | 771 | struct pnfs_layout_segment *lseg) |
637 | { | 772 | { |
638 | struct pnfs_layout_segment *lp; | 773 | struct pnfs_layout_segment *lp; |
639 | int found = 0; | ||
640 | 774 | ||
641 | dprintk("%s:Begin\n", __func__); | 775 | dprintk("%s:Begin\n", __func__); |
642 | 776 | ||
643 | assert_spin_locked(&lo->plh_inode->i_lock); | 777 | assert_spin_locked(&lo->plh_inode->i_lock); |
644 | list_for_each_entry(lp, &lo->plh_segs, pls_list) { | 778 | list_for_each_entry(lp, &lo->plh_segs, pls_list) { |
645 | if (cmp_layout(lp->pls_range.iomode, lseg->pls_range.iomode) > 0) | 779 | if (cmp_layout(&lseg->pls_range, &lp->pls_range) > 0) |
646 | continue; | 780 | continue; |
647 | list_add_tail(&lseg->pls_list, &lp->pls_list); | 781 | list_add_tail(&lseg->pls_list, &lp->pls_list); |
648 | dprintk("%s: inserted lseg %p " | 782 | dprintk("%s: inserted lseg %p " |
@@ -652,16 +786,14 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo, | |||
652 | lseg->pls_range.offset, lseg->pls_range.length, | 786 | lseg->pls_range.offset, lseg->pls_range.length, |
653 | lp, lp->pls_range.iomode, lp->pls_range.offset, | 787 | lp, lp->pls_range.iomode, lp->pls_range.offset, |
654 | lp->pls_range.length); | 788 | lp->pls_range.length); |
655 | found = 1; | 789 | goto out; |
656 | break; | ||
657 | } | ||
658 | if (!found) { | ||
659 | list_add_tail(&lseg->pls_list, &lo->plh_segs); | ||
660 | dprintk("%s: inserted lseg %p " | ||
661 | "iomode %d offset %llu length %llu at tail\n", | ||
662 | __func__, lseg, lseg->pls_range.iomode, | ||
663 | lseg->pls_range.offset, lseg->pls_range.length); | ||
664 | } | 790 | } |
791 | list_add_tail(&lseg->pls_list, &lo->plh_segs); | ||
792 | dprintk("%s: inserted lseg %p " | ||
793 | "iomode %d offset %llu length %llu at tail\n", | ||
794 | __func__, lseg, lseg->pls_range.iomode, | ||
795 | lseg->pls_range.offset, lseg->pls_range.length); | ||
796 | out: | ||
665 | get_layout_hdr(lo); | 797 | get_layout_hdr(lo); |
666 | 798 | ||
667 | dprintk("%s:Return\n", __func__); | 799 | dprintk("%s:Return\n", __func__); |
@@ -672,7 +804,7 @@ alloc_init_layout_hdr(struct inode *ino, gfp_t gfp_flags) | |||
672 | { | 804 | { |
673 | struct pnfs_layout_hdr *lo; | 805 | struct pnfs_layout_hdr *lo; |
674 | 806 | ||
675 | lo = kzalloc(sizeof(struct pnfs_layout_hdr), gfp_flags); | 807 | lo = pnfs_alloc_layout_hdr(ino, gfp_flags); |
676 | if (!lo) | 808 | if (!lo) |
677 | return NULL; | 809 | return NULL; |
678 | atomic_set(&lo->plh_refcount, 1); | 810 | atomic_set(&lo->plh_refcount, 1); |
@@ -705,7 +837,7 @@ pnfs_find_alloc_layout(struct inode *ino, gfp_t gfp_flags) | |||
705 | if (likely(nfsi->layout == NULL)) /* Won the race? */ | 837 | if (likely(nfsi->layout == NULL)) /* Won the race? */ |
706 | nfsi->layout = new; | 838 | nfsi->layout = new; |
707 | else | 839 | else |
708 | kfree(new); | 840 | pnfs_free_layout_hdr(new); |
709 | return nfsi->layout; | 841 | return nfsi->layout; |
710 | } | 842 | } |
711 | 843 | ||
@@ -721,16 +853,28 @@ pnfs_find_alloc_layout(struct inode *ino, gfp_t gfp_flags) | |||
721 | * READ RW true | 853 | * READ RW true |
722 | */ | 854 | */ |
723 | static int | 855 | static int |
724 | is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode) | 856 | is_matching_lseg(struct pnfs_layout_range *ls_range, |
857 | struct pnfs_layout_range *range) | ||
725 | { | 858 | { |
726 | return (iomode != IOMODE_RW || lseg->pls_range.iomode == IOMODE_RW); | 859 | struct pnfs_layout_range range1; |
860 | |||
861 | if ((range->iomode == IOMODE_RW && | ||
862 | ls_range->iomode != IOMODE_RW) || | ||
863 | !lo_seg_intersecting(ls_range, range)) | ||
864 | return 0; | ||
865 | |||
866 | /* range1 covers only the first byte in the range */ | ||
867 | range1 = *range; | ||
868 | range1.length = 1; | ||
869 | return lo_seg_contained(ls_range, &range1); | ||
727 | } | 870 | } |
728 | 871 | ||
729 | /* | 872 | /* |
730 | * lookup range in layout | 873 | * lookup range in layout |
731 | */ | 874 | */ |
732 | static struct pnfs_layout_segment * | 875 | static struct pnfs_layout_segment * |
733 | pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode) | 876 | pnfs_find_lseg(struct pnfs_layout_hdr *lo, |
877 | struct pnfs_layout_range *range) | ||
734 | { | 878 | { |
735 | struct pnfs_layout_segment *lseg, *ret = NULL; | 879 | struct pnfs_layout_segment *lseg, *ret = NULL; |
736 | 880 | ||
@@ -739,11 +883,11 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode) | |||
739 | assert_spin_locked(&lo->plh_inode->i_lock); | 883 | assert_spin_locked(&lo->plh_inode->i_lock); |
740 | list_for_each_entry(lseg, &lo->plh_segs, pls_list) { | 884 | list_for_each_entry(lseg, &lo->plh_segs, pls_list) { |
741 | if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && | 885 | if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && |
742 | is_matching_lseg(lseg, iomode)) { | 886 | is_matching_lseg(&lseg->pls_range, range)) { |
743 | ret = get_lseg(lseg); | 887 | ret = get_lseg(lseg); |
744 | break; | 888 | break; |
745 | } | 889 | } |
746 | if (cmp_layout(iomode, lseg->pls_range.iomode) > 0) | 890 | if (cmp_layout(range, &lseg->pls_range) > 0) |
747 | break; | 891 | break; |
748 | } | 892 | } |
749 | 893 | ||
@@ -759,9 +903,17 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode) | |||
759 | struct pnfs_layout_segment * | 903 | struct pnfs_layout_segment * |
760 | pnfs_update_layout(struct inode *ino, | 904 | pnfs_update_layout(struct inode *ino, |
761 | struct nfs_open_context *ctx, | 905 | struct nfs_open_context *ctx, |
906 | loff_t pos, | ||
907 | u64 count, | ||
762 | enum pnfs_iomode iomode, | 908 | enum pnfs_iomode iomode, |
763 | gfp_t gfp_flags) | 909 | gfp_t gfp_flags) |
764 | { | 910 | { |
911 | struct pnfs_layout_range arg = { | ||
912 | .iomode = iomode, | ||
913 | .offset = pos, | ||
914 | .length = count, | ||
915 | }; | ||
916 | unsigned pg_offset; | ||
765 | struct nfs_inode *nfsi = NFS_I(ino); | 917 | struct nfs_inode *nfsi = NFS_I(ino); |
766 | struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; | 918 | struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; |
767 | struct pnfs_layout_hdr *lo; | 919 | struct pnfs_layout_hdr *lo; |
@@ -789,7 +941,7 @@ pnfs_update_layout(struct inode *ino, | |||
789 | goto out_unlock; | 941 | goto out_unlock; |
790 | 942 | ||
791 | /* Check to see if the layout for the given range already exists */ | 943 | /* Check to see if the layout for the given range already exists */ |
792 | lseg = pnfs_find_lseg(lo, iomode); | 944 | lseg = pnfs_find_lseg(lo, &arg); |
793 | if (lseg) | 945 | if (lseg) |
794 | goto out_unlock; | 946 | goto out_unlock; |
795 | 947 | ||
@@ -811,7 +963,14 @@ pnfs_update_layout(struct inode *ino, | |||
811 | spin_unlock(&clp->cl_lock); | 963 | spin_unlock(&clp->cl_lock); |
812 | } | 964 | } |
813 | 965 | ||
814 | lseg = send_layoutget(lo, ctx, iomode, gfp_flags); | 966 | pg_offset = arg.offset & ~PAGE_CACHE_MASK; |
967 | if (pg_offset) { | ||
968 | arg.offset -= pg_offset; | ||
969 | arg.length += pg_offset; | ||
970 | } | ||
971 | arg.length = PAGE_CACHE_ALIGN(arg.length); | ||
972 | |||
973 | lseg = send_layoutget(lo, ctx, &arg, gfp_flags); | ||
815 | if (!lseg && first) { | 974 | if (!lseg && first) { |
816 | spin_lock(&clp->cl_lock); | 975 | spin_lock(&clp->cl_lock); |
817 | list_del_init(&lo->plh_layouts); | 976 | list_del_init(&lo->plh_layouts); |
@@ -838,17 +997,6 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) | |||
838 | struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; | 997 | struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; |
839 | int status = 0; | 998 | int status = 0; |
840 | 999 | ||
841 | /* Verify we got what we asked for. | ||
842 | * Note that because the xdr parsing only accepts a single | ||
843 | * element array, this can fail even if the server is behaving | ||
844 | * correctly. | ||
845 | */ | ||
846 | if (lgp->args.range.iomode > res->range.iomode || | ||
847 | res->range.offset != 0 || | ||
848 | res->range.length != NFS4_MAX_UINT64) { | ||
849 | status = -EINVAL; | ||
850 | goto out; | ||
851 | } | ||
852 | /* Inject layout blob into I/O device driver */ | 1000 | /* Inject layout blob into I/O device driver */ |
853 | lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res, lgp->gfp_flags); | 1001 | lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res, lgp->gfp_flags); |
854 | if (!lseg || IS_ERR(lseg)) { | 1002 | if (!lseg || IS_ERR(lseg)) { |
@@ -895,51 +1043,64 @@ out_forget_reply: | |||
895 | goto out; | 1043 | goto out; |
896 | } | 1044 | } |
897 | 1045 | ||
898 | static int pnfs_read_pg_test(struct nfs_pageio_descriptor *pgio, | 1046 | bool |
899 | struct nfs_page *prev, | 1047 | pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, |
900 | struct nfs_page *req) | 1048 | struct nfs_page *req) |
901 | { | 1049 | { |
1050 | enum pnfs_iomode access_type; | ||
1051 | gfp_t gfp_flags; | ||
1052 | |||
1053 | /* We assume that pg_ioflags == 0 iff we're reading a page */ | ||
1054 | if (pgio->pg_ioflags == 0) { | ||
1055 | access_type = IOMODE_READ; | ||
1056 | gfp_flags = GFP_KERNEL; | ||
1057 | } else { | ||
1058 | access_type = IOMODE_RW; | ||
1059 | gfp_flags = GFP_NOFS; | ||
1060 | } | ||
1061 | |||
902 | if (pgio->pg_count == prev->wb_bytes) { | 1062 | if (pgio->pg_count == prev->wb_bytes) { |
903 | /* This is first coelesce call for a series of nfs_pages */ | 1063 | /* This is first coelesce call for a series of nfs_pages */ |
904 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | 1064 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, |
905 | prev->wb_context, | 1065 | prev->wb_context, |
906 | IOMODE_READ, | 1066 | req_offset(req), |
907 | GFP_KERNEL); | 1067 | pgio->pg_count, |
1068 | access_type, | ||
1069 | gfp_flags); | ||
1070 | return true; | ||
908 | } | 1071 | } |
909 | return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req); | ||
910 | } | ||
911 | 1072 | ||
912 | void | 1073 | if (pgio->pg_lseg && |
913 | pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) | 1074 | req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset, |
914 | { | 1075 | pgio->pg_lseg->pls_range.length)) |
915 | struct pnfs_layoutdriver_type *ld; | 1076 | return false; |
916 | 1077 | ||
917 | ld = NFS_SERVER(inode)->pnfs_curr_ld; | 1078 | return true; |
918 | pgio->pg_test = (ld && ld->pg_test) ? pnfs_read_pg_test : NULL; | ||
919 | } | 1079 | } |
1080 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); | ||
920 | 1081 | ||
921 | static int pnfs_write_pg_test(struct nfs_pageio_descriptor *pgio, | 1082 | /* |
922 | struct nfs_page *prev, | 1083 | * Called by non rpc-based layout drivers |
923 | struct nfs_page *req) | 1084 | */ |
1085 | int | ||
1086 | pnfs_ld_write_done(struct nfs_write_data *data) | ||
924 | { | 1087 | { |
925 | if (pgio->pg_count == prev->wb_bytes) { | 1088 | int status; |
926 | /* This is first coelesce call for a series of nfs_pages */ | ||
927 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | ||
928 | prev->wb_context, | ||
929 | IOMODE_RW, | ||
930 | GFP_NOFS); | ||
931 | } | ||
932 | return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req); | ||
933 | } | ||
934 | 1089 | ||
935 | void | 1090 | if (!data->pnfs_error) { |
936 | pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode) | 1091 | pnfs_set_layoutcommit(data); |
937 | { | 1092 | data->mds_ops->rpc_call_done(&data->task, data); |
938 | struct pnfs_layoutdriver_type *ld; | 1093 | data->mds_ops->rpc_release(data); |
1094 | return 0; | ||
1095 | } | ||
939 | 1096 | ||
940 | ld = NFS_SERVER(inode)->pnfs_curr_ld; | 1097 | dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__, |
941 | pgio->pg_test = (ld && ld->pg_test) ? pnfs_write_pg_test : NULL; | 1098 | data->pnfs_error); |
1099 | status = nfs_initiate_write(data, NFS_CLIENT(data->inode), | ||
1100 | data->mds_ops, NFS_FILE_SYNC); | ||
1101 | return status ? : -EAGAIN; | ||
942 | } | 1102 | } |
1103 | EXPORT_SYMBOL_GPL(pnfs_ld_write_done); | ||
943 | 1104 | ||
944 | enum pnfs_try_status | 1105 | enum pnfs_try_status |
945 | pnfs_try_to_write_data(struct nfs_write_data *wdata, | 1106 | pnfs_try_to_write_data(struct nfs_write_data *wdata, |
@@ -966,6 +1127,29 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata, | |||
966 | } | 1127 | } |
967 | 1128 | ||
968 | /* | 1129 | /* |
1130 | * Called by non rpc-based layout drivers | ||
1131 | */ | ||
1132 | int | ||
1133 | pnfs_ld_read_done(struct nfs_read_data *data) | ||
1134 | { | ||
1135 | int status; | ||
1136 | |||
1137 | if (!data->pnfs_error) { | ||
1138 | __nfs4_read_done_cb(data); | ||
1139 | data->mds_ops->rpc_call_done(&data->task, data); | ||
1140 | data->mds_ops->rpc_release(data); | ||
1141 | return 0; | ||
1142 | } | ||
1143 | |||
1144 | dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__, | ||
1145 | data->pnfs_error); | ||
1146 | status = nfs_initiate_read(data, NFS_CLIENT(data->inode), | ||
1147 | data->mds_ops); | ||
1148 | return status ? : -EAGAIN; | ||
1149 | } | ||
1150 | EXPORT_SYMBOL_GPL(pnfs_ld_read_done); | ||
1151 | |||
1152 | /* | ||
969 | * Call the appropriate parallel I/O subsystem read function. | 1153 | * Call the appropriate parallel I/O subsystem read function. |
970 | */ | 1154 | */ |
971 | enum pnfs_try_status | 1155 | enum pnfs_try_status |
@@ -1009,7 +1193,7 @@ void | |||
1009 | pnfs_set_layoutcommit(struct nfs_write_data *wdata) | 1193 | pnfs_set_layoutcommit(struct nfs_write_data *wdata) |
1010 | { | 1194 | { |
1011 | struct nfs_inode *nfsi = NFS_I(wdata->inode); | 1195 | struct nfs_inode *nfsi = NFS_I(wdata->inode); |
1012 | loff_t end_pos = wdata->args.offset + wdata->res.count; | 1196 | loff_t end_pos = wdata->mds_offset + wdata->res.count; |
1013 | bool mark_as_dirty = false; | 1197 | bool mark_as_dirty = false; |
1014 | 1198 | ||
1015 | spin_lock(&nfsi->vfs_inode.i_lock); | 1199 | spin_lock(&nfsi->vfs_inode.i_lock); |
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 0c015bad9e7a..48d0a8e4d062 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h | |||
@@ -30,6 +30,7 @@ | |||
30 | #ifndef FS_NFS_PNFS_H | 30 | #ifndef FS_NFS_PNFS_H |
31 | #define FS_NFS_PNFS_H | 31 | #define FS_NFS_PNFS_H |
32 | 32 | ||
33 | #include <linux/nfs_fs.h> | ||
33 | #include <linux/nfs_page.h> | 34 | #include <linux/nfs_page.h> |
34 | 35 | ||
35 | enum { | 36 | enum { |
@@ -64,17 +65,29 @@ enum { | |||
64 | NFS_LAYOUT_DESTROYED, /* no new use of layout allowed */ | 65 | NFS_LAYOUT_DESTROYED, /* no new use of layout allowed */ |
65 | }; | 66 | }; |
66 | 67 | ||
68 | enum layoutdriver_policy_flags { | ||
69 | /* Should the pNFS client commit and return the layout upon a setattr */ | ||
70 | PNFS_LAYOUTRET_ON_SETATTR = 1 << 0, | ||
71 | }; | ||
72 | |||
73 | struct nfs4_deviceid_node; | ||
74 | |||
67 | /* Per-layout driver specific registration structure */ | 75 | /* Per-layout driver specific registration structure */ |
68 | struct pnfs_layoutdriver_type { | 76 | struct pnfs_layoutdriver_type { |
69 | struct list_head pnfs_tblid; | 77 | struct list_head pnfs_tblid; |
70 | const u32 id; | 78 | const u32 id; |
71 | const char *name; | 79 | const char *name; |
72 | struct module *owner; | 80 | struct module *owner; |
81 | unsigned flags; | ||
82 | |||
83 | struct pnfs_layout_hdr * (*alloc_layout_hdr) (struct inode *inode, gfp_t gfp_flags); | ||
84 | void (*free_layout_hdr) (struct pnfs_layout_hdr *); | ||
85 | |||
73 | struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr, gfp_t gfp_flags); | 86 | struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr, gfp_t gfp_flags); |
74 | void (*free_lseg) (struct pnfs_layout_segment *lseg); | 87 | void (*free_lseg) (struct pnfs_layout_segment *lseg); |
75 | 88 | ||
76 | /* test for nfs page cache coalescing */ | 89 | /* test for nfs page cache coalescing */ |
77 | int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); | 90 | bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); |
78 | 91 | ||
79 | /* Returns true if layoutdriver wants to divert this request to | 92 | /* Returns true if layoutdriver wants to divert this request to |
80 | * driver's commit routine. | 93 | * driver's commit routine. |
@@ -89,6 +102,16 @@ struct pnfs_layoutdriver_type { | |||
89 | */ | 102 | */ |
90 | enum pnfs_try_status (*read_pagelist) (struct nfs_read_data *nfs_data); | 103 | enum pnfs_try_status (*read_pagelist) (struct nfs_read_data *nfs_data); |
91 | enum pnfs_try_status (*write_pagelist) (struct nfs_write_data *nfs_data, int how); | 104 | enum pnfs_try_status (*write_pagelist) (struct nfs_write_data *nfs_data, int how); |
105 | |||
106 | void (*free_deviceid_node) (struct nfs4_deviceid_node *); | ||
107 | |||
108 | void (*encode_layoutreturn) (struct pnfs_layout_hdr *layoutid, | ||
109 | struct xdr_stream *xdr, | ||
110 | const struct nfs4_layoutreturn_args *args); | ||
111 | |||
112 | void (*encode_layoutcommit) (struct pnfs_layout_hdr *layoutid, | ||
113 | struct xdr_stream *xdr, | ||
114 | const struct nfs4_layoutcommit_args *args); | ||
92 | }; | 115 | }; |
93 | 116 | ||
94 | struct pnfs_layout_hdr { | 117 | struct pnfs_layout_hdr { |
@@ -120,21 +143,22 @@ extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); | |||
120 | extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, | 143 | extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, |
121 | struct pnfs_device *dev); | 144 | struct pnfs_device *dev); |
122 | extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp); | 145 | extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp); |
146 | extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); | ||
123 | 147 | ||
124 | /* pnfs.c */ | 148 | /* pnfs.c */ |
125 | void get_layout_hdr(struct pnfs_layout_hdr *lo); | 149 | void get_layout_hdr(struct pnfs_layout_hdr *lo); |
126 | void put_lseg(struct pnfs_layout_segment *lseg); | 150 | void put_lseg(struct pnfs_layout_segment *lseg); |
127 | struct pnfs_layout_segment * | 151 | struct pnfs_layout_segment * |
128 | pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, | 152 | pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, |
129 | enum pnfs_iomode access_type, gfp_t gfp_flags); | 153 | loff_t pos, u64 count, enum pnfs_iomode access_type, |
154 | gfp_t gfp_flags); | ||
130 | void set_pnfs_layoutdriver(struct nfs_server *, u32 id); | 155 | void set_pnfs_layoutdriver(struct nfs_server *, u32 id); |
131 | void unset_pnfs_layoutdriver(struct nfs_server *); | 156 | void unset_pnfs_layoutdriver(struct nfs_server *); |
132 | enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *, | 157 | enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *, |
133 | const struct rpc_call_ops *, int); | 158 | const struct rpc_call_ops *, int); |
134 | enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *, | 159 | enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *, |
135 | const struct rpc_call_ops *); | 160 | const struct rpc_call_ops *); |
136 | void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *); | 161 | bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); |
137 | void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *); | ||
138 | int pnfs_layout_process(struct nfs4_layoutget *lgp); | 162 | int pnfs_layout_process(struct nfs4_layoutget *lgp); |
139 | void pnfs_free_lseg_list(struct list_head *tmp_list); | 163 | void pnfs_free_lseg_list(struct list_head *tmp_list); |
140 | void pnfs_destroy_layout(struct nfs_inode *); | 164 | void pnfs_destroy_layout(struct nfs_inode *); |
@@ -148,13 +172,37 @@ int pnfs_choose_layoutget_stateid(nfs4_stateid *dst, | |||
148 | struct nfs4_state *open_state); | 172 | struct nfs4_state *open_state); |
149 | int mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, | 173 | int mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, |
150 | struct list_head *tmp_list, | 174 | struct list_head *tmp_list, |
151 | u32 iomode); | 175 | struct pnfs_layout_range *recall_range); |
152 | bool pnfs_roc(struct inode *ino); | 176 | bool pnfs_roc(struct inode *ino); |
153 | void pnfs_roc_release(struct inode *ino); | 177 | void pnfs_roc_release(struct inode *ino); |
154 | void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); | 178 | void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); |
155 | bool pnfs_roc_drain(struct inode *ino, u32 *barrier); | 179 | bool pnfs_roc_drain(struct inode *ino, u32 *barrier); |
156 | void pnfs_set_layoutcommit(struct nfs_write_data *wdata); | 180 | void pnfs_set_layoutcommit(struct nfs_write_data *wdata); |
157 | int pnfs_layoutcommit_inode(struct inode *inode, bool sync); | 181 | int pnfs_layoutcommit_inode(struct inode *inode, bool sync); |
182 | int _pnfs_return_layout(struct inode *); | ||
183 | int pnfs_ld_write_done(struct nfs_write_data *); | ||
184 | int pnfs_ld_read_done(struct nfs_read_data *); | ||
185 | |||
186 | /* pnfs_dev.c */ | ||
187 | struct nfs4_deviceid_node { | ||
188 | struct hlist_node node; | ||
189 | const struct pnfs_layoutdriver_type *ld; | ||
190 | const struct nfs_client *nfs_client; | ||
191 | struct nfs4_deviceid deviceid; | ||
192 | atomic_t ref; | ||
193 | }; | ||
194 | |||
195 | void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id); | ||
196 | struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); | ||
197 | struct nfs4_deviceid_node *nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); | ||
198 | void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); | ||
199 | void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, | ||
200 | const struct pnfs_layoutdriver_type *, | ||
201 | const struct nfs_client *, | ||
202 | const struct nfs4_deviceid *); | ||
203 | struct nfs4_deviceid_node *nfs4_insert_deviceid_node(struct nfs4_deviceid_node *); | ||
204 | bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *); | ||
205 | void nfs4_deviceid_purge_client(const struct nfs_client *); | ||
158 | 206 | ||
159 | static inline int lo_fail_bit(u32 iomode) | 207 | static inline int lo_fail_bit(u32 iomode) |
160 | { | 208 | { |
@@ -223,6 +271,36 @@ static inline void pnfs_clear_request_commit(struct nfs_page *req) | |||
223 | put_lseg(req->wb_commit_lseg); | 271 | put_lseg(req->wb_commit_lseg); |
224 | } | 272 | } |
225 | 273 | ||
274 | /* Should the pNFS client commit and return the layout upon a setattr */ | ||
275 | static inline bool | ||
276 | pnfs_ld_layoutret_on_setattr(struct inode *inode) | ||
277 | { | ||
278 | if (!pnfs_enabled_sb(NFS_SERVER(inode))) | ||
279 | return false; | ||
280 | return NFS_SERVER(inode)->pnfs_curr_ld->flags & | ||
281 | PNFS_LAYOUTRET_ON_SETATTR; | ||
282 | } | ||
283 | |||
284 | static inline int pnfs_return_layout(struct inode *ino) | ||
285 | { | ||
286 | struct nfs_inode *nfsi = NFS_I(ino); | ||
287 | struct nfs_server *nfss = NFS_SERVER(ino); | ||
288 | |||
289 | if (pnfs_enabled_sb(nfss) && nfsi->layout) | ||
290 | return _pnfs_return_layout(ino); | ||
291 | |||
292 | return 0; | ||
293 | } | ||
294 | |||
295 | static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio, | ||
296 | struct inode *inode) | ||
297 | { | ||
298 | struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; | ||
299 | |||
300 | if (ld) | ||
301 | pgio->pg_test = ld->pg_test; | ||
302 | } | ||
303 | |||
226 | #else /* CONFIG_NFS_V4_1 */ | 304 | #else /* CONFIG_NFS_V4_1 */ |
227 | 305 | ||
228 | static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) | 306 | static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) |
@@ -245,7 +323,8 @@ static inline void put_lseg(struct pnfs_layout_segment *lseg) | |||
245 | 323 | ||
246 | static inline struct pnfs_layout_segment * | 324 | static inline struct pnfs_layout_segment * |
247 | pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, | 325 | pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, |
248 | enum pnfs_iomode access_type, gfp_t gfp_flags) | 326 | loff_t pos, u64 count, enum pnfs_iomode access_type, |
327 | gfp_t gfp_flags) | ||
249 | { | 328 | { |
250 | return NULL; | 329 | return NULL; |
251 | } | 330 | } |
@@ -264,6 +343,17 @@ pnfs_try_to_write_data(struct nfs_write_data *data, | |||
264 | return PNFS_NOT_ATTEMPTED; | 343 | return PNFS_NOT_ATTEMPTED; |
265 | } | 344 | } |
266 | 345 | ||
346 | static inline int pnfs_return_layout(struct inode *ino) | ||
347 | { | ||
348 | return 0; | ||
349 | } | ||
350 | |||
351 | static inline bool | ||
352 | pnfs_ld_layoutret_on_setattr(struct inode *inode) | ||
353 | { | ||
354 | return false; | ||
355 | } | ||
356 | |||
267 | static inline bool | 357 | static inline bool |
268 | pnfs_roc(struct inode *ino) | 358 | pnfs_roc(struct inode *ino) |
269 | { | 359 | { |
@@ -294,16 +384,9 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s) | |||
294 | { | 384 | { |
295 | } | 385 | } |
296 | 386 | ||
297 | static inline void | 387 | static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio, |
298 | pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *ino) | 388 | struct inode *inode) |
299 | { | ||
300 | pgio->pg_test = NULL; | ||
301 | } | ||
302 | |||
303 | static inline void | ||
304 | pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *ino) | ||
305 | { | 389 | { |
306 | pgio->pg_test = NULL; | ||
307 | } | 390 | } |
308 | 391 | ||
309 | static inline void | 392 | static inline void |
@@ -331,6 +414,10 @@ static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync) | |||
331 | { | 414 | { |
332 | return 0; | 415 | return 0; |
333 | } | 416 | } |
417 | |||
418 | static inline void nfs4_deviceid_purge_client(struct nfs_client *ncl) | ||
419 | { | ||
420 | } | ||
334 | #endif /* CONFIG_NFS_V4_1 */ | 421 | #endif /* CONFIG_NFS_V4_1 */ |
335 | 422 | ||
336 | #endif /* FS_NFS_PNFS_H */ | 423 | #endif /* FS_NFS_PNFS_H */ |
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c new file mode 100644 index 000000000000..c65e133ce9c0 --- /dev/null +++ b/fs/nfs/pnfs_dev.c | |||
@@ -0,0 +1,270 @@ | |||
1 | /* | ||
2 | * Device operations for the pnfs client. | ||
3 | * | ||
4 | * Copyright (c) 2002 | ||
5 | * The Regents of the University of Michigan | ||
6 | * All Rights Reserved | ||
7 | * | ||
8 | * Dean Hildebrand <dhildebz@umich.edu> | ||
9 | * Garth Goodson <Garth.Goodson@netapp.com> | ||
10 | * | ||
11 | * Permission is granted to use, copy, create derivative works, and | ||
12 | * redistribute this software and such derivative works for any purpose, | ||
13 | * so long as the name of the University of Michigan is not used in | ||
14 | * any advertising or publicity pertaining to the use or distribution | ||
15 | * of this software without specific, written prior authorization. If | ||
16 | * the above copyright notice or any other identification of the | ||
17 | * University of Michigan is included in any copy of any portion of | ||
18 | * this software, then the disclaimer below must also be included. | ||
19 | * | ||
20 | * This software is provided as is, without representation or warranty | ||
21 | * of any kind either express or implied, including without limitation | ||
22 | * the implied warranties of merchantability, fitness for a particular | ||
23 | * purpose, or noninfringement. The Regents of the University of | ||
24 | * Michigan shall not be liable for any damages, including special, | ||
25 | * indirect, incidental, or consequential damages, with respect to any | ||
26 | * claim arising out of or in connection with the use of the software, | ||
27 | * even if it has been or is hereafter advised of the possibility of | ||
28 | * such damages. | ||
29 | */ | ||
30 | |||
31 | #include "pnfs.h" | ||
32 | |||
33 | #define NFSDBG_FACILITY NFSDBG_PNFS | ||
34 | |||
35 | /* | ||
36 | * Device ID RCU cache. A device ID is unique per server and layout type. | ||
37 | */ | ||
38 | #define NFS4_DEVICE_ID_HASH_BITS 5 | ||
39 | #define NFS4_DEVICE_ID_HASH_SIZE (1 << NFS4_DEVICE_ID_HASH_BITS) | ||
40 | #define NFS4_DEVICE_ID_HASH_MASK (NFS4_DEVICE_ID_HASH_SIZE - 1) | ||
41 | |||
42 | static struct hlist_head nfs4_deviceid_cache[NFS4_DEVICE_ID_HASH_SIZE]; | ||
43 | static DEFINE_SPINLOCK(nfs4_deviceid_lock); | ||
44 | |||
45 | void | ||
46 | nfs4_print_deviceid(const struct nfs4_deviceid *id) | ||
47 | { | ||
48 | u32 *p = (u32 *)id; | ||
49 | |||
50 | dprintk("%s: device id= [%x%x%x%x]\n", __func__, | ||
51 | p[0], p[1], p[2], p[3]); | ||
52 | } | ||
53 | EXPORT_SYMBOL_GPL(nfs4_print_deviceid); | ||
54 | |||
55 | static inline u32 | ||
56 | nfs4_deviceid_hash(const struct nfs4_deviceid *id) | ||
57 | { | ||
58 | unsigned char *cptr = (unsigned char *)id->data; | ||
59 | unsigned int nbytes = NFS4_DEVICEID4_SIZE; | ||
60 | u32 x = 0; | ||
61 | |||
62 | while (nbytes--) { | ||
63 | x *= 37; | ||
64 | x += *cptr++; | ||
65 | } | ||
66 | return x & NFS4_DEVICE_ID_HASH_MASK; | ||
67 | } | ||
68 | |||
69 | static struct nfs4_deviceid_node * | ||
70 | _lookup_deviceid(const struct pnfs_layoutdriver_type *ld, | ||
71 | const struct nfs_client *clp, const struct nfs4_deviceid *id, | ||
72 | long hash) | ||
73 | { | ||
74 | struct nfs4_deviceid_node *d; | ||
75 | struct hlist_node *n; | ||
76 | |||
77 | hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node) | ||
78 | if (d->ld == ld && d->nfs_client == clp && | ||
79 | !memcmp(&d->deviceid, id, sizeof(*id))) { | ||
80 | if (atomic_read(&d->ref)) | ||
81 | return d; | ||
82 | else | ||
83 | continue; | ||
84 | } | ||
85 | return NULL; | ||
86 | } | ||
87 | |||
88 | /* | ||
89 | * Lookup a deviceid in cache and get a reference count on it if found | ||
90 | * | ||
91 | * @clp nfs_client associated with deviceid | ||
92 | * @id deviceid to look up | ||
93 | */ | ||
94 | struct nfs4_deviceid_node * | ||
95 | _find_get_deviceid(const struct pnfs_layoutdriver_type *ld, | ||
96 | const struct nfs_client *clp, const struct nfs4_deviceid *id, | ||
97 | long hash) | ||
98 | { | ||
99 | struct nfs4_deviceid_node *d; | ||
100 | |||
101 | rcu_read_lock(); | ||
102 | d = _lookup_deviceid(ld, clp, id, hash); | ||
103 | if (d && !atomic_inc_not_zero(&d->ref)) | ||
104 | d = NULL; | ||
105 | rcu_read_unlock(); | ||
106 | return d; | ||
107 | } | ||
108 | |||
109 | struct nfs4_deviceid_node * | ||
110 | nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *ld, | ||
111 | const struct nfs_client *clp, const struct nfs4_deviceid *id) | ||
112 | { | ||
113 | return _find_get_deviceid(ld, clp, id, nfs4_deviceid_hash(id)); | ||
114 | } | ||
115 | EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid); | ||
116 | |||
117 | /* | ||
118 | * Unhash and put deviceid | ||
119 | * | ||
120 | * @clp nfs_client associated with deviceid | ||
121 | * @id the deviceid to unhash | ||
122 | * | ||
123 | * @ret the unhashed node, if found and dereferenced to zero, NULL otherwise. | ||
124 | */ | ||
125 | struct nfs4_deviceid_node * | ||
126 | nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *ld, | ||
127 | const struct nfs_client *clp, const struct nfs4_deviceid *id) | ||
128 | { | ||
129 | struct nfs4_deviceid_node *d; | ||
130 | |||
131 | spin_lock(&nfs4_deviceid_lock); | ||
132 | rcu_read_lock(); | ||
133 | d = _lookup_deviceid(ld, clp, id, nfs4_deviceid_hash(id)); | ||
134 | rcu_read_unlock(); | ||
135 | if (!d) { | ||
136 | spin_unlock(&nfs4_deviceid_lock); | ||
137 | return NULL; | ||
138 | } | ||
139 | hlist_del_init_rcu(&d->node); | ||
140 | spin_unlock(&nfs4_deviceid_lock); | ||
141 | synchronize_rcu(); | ||
142 | |||
143 | /* balance the initial ref set in pnfs_insert_deviceid */ | ||
144 | if (atomic_dec_and_test(&d->ref)) | ||
145 | return d; | ||
146 | |||
147 | return NULL; | ||
148 | } | ||
149 | EXPORT_SYMBOL_GPL(nfs4_unhash_put_deviceid); | ||
150 | |||
151 | /* | ||
152 | * Delete a deviceid from cache | ||
153 | * | ||
154 | * @clp struct nfs_client qualifying the deviceid | ||
155 | * @id deviceid to delete | ||
156 | */ | ||
157 | void | ||
158 | nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld, | ||
159 | const struct nfs_client *clp, const struct nfs4_deviceid *id) | ||
160 | { | ||
161 | struct nfs4_deviceid_node *d; | ||
162 | |||
163 | d = nfs4_unhash_put_deviceid(ld, clp, id); | ||
164 | if (!d) | ||
165 | return; | ||
166 | d->ld->free_deviceid_node(d); | ||
167 | } | ||
168 | EXPORT_SYMBOL_GPL(nfs4_delete_deviceid); | ||
169 | |||
170 | void | ||
171 | nfs4_init_deviceid_node(struct nfs4_deviceid_node *d, | ||
172 | const struct pnfs_layoutdriver_type *ld, | ||
173 | const struct nfs_client *nfs_client, | ||
174 | const struct nfs4_deviceid *id) | ||
175 | { | ||
176 | INIT_HLIST_NODE(&d->node); | ||
177 | d->ld = ld; | ||
178 | d->nfs_client = nfs_client; | ||
179 | d->deviceid = *id; | ||
180 | atomic_set(&d->ref, 1); | ||
181 | } | ||
182 | EXPORT_SYMBOL_GPL(nfs4_init_deviceid_node); | ||
183 | |||
184 | /* | ||
185 | * Uniquely initialize and insert a deviceid node into cache | ||
186 | * | ||
187 | * @new new deviceid node | ||
188 | * Note that the caller must set up the following members: | ||
189 | * new->ld | ||
190 | * new->nfs_client | ||
191 | * new->deviceid | ||
192 | * | ||
193 | * @ret the inserted node, if none found, otherwise, the found entry. | ||
194 | */ | ||
195 | struct nfs4_deviceid_node * | ||
196 | nfs4_insert_deviceid_node(struct nfs4_deviceid_node *new) | ||
197 | { | ||
198 | struct nfs4_deviceid_node *d; | ||
199 | long hash; | ||
200 | |||
201 | spin_lock(&nfs4_deviceid_lock); | ||
202 | hash = nfs4_deviceid_hash(&new->deviceid); | ||
203 | d = _find_get_deviceid(new->ld, new->nfs_client, &new->deviceid, hash); | ||
204 | if (d) { | ||
205 | spin_unlock(&nfs4_deviceid_lock); | ||
206 | return d; | ||
207 | } | ||
208 | |||
209 | hlist_add_head_rcu(&new->node, &nfs4_deviceid_cache[hash]); | ||
210 | spin_unlock(&nfs4_deviceid_lock); | ||
211 | |||
212 | return new; | ||
213 | } | ||
214 | EXPORT_SYMBOL_GPL(nfs4_insert_deviceid_node); | ||
215 | |||
216 | /* | ||
217 | * Dereference a deviceid node and delete it when its reference count drops | ||
218 | * to zero. | ||
219 | * | ||
220 | * @d deviceid node to put | ||
221 | * | ||
222 | * @ret true iff the node was deleted | ||
223 | */ | ||
224 | bool | ||
225 | nfs4_put_deviceid_node(struct nfs4_deviceid_node *d) | ||
226 | { | ||
227 | if (!atomic_dec_and_lock(&d->ref, &nfs4_deviceid_lock)) | ||
228 | return false; | ||
229 | hlist_del_init_rcu(&d->node); | ||
230 | spin_unlock(&nfs4_deviceid_lock); | ||
231 | synchronize_rcu(); | ||
232 | d->ld->free_deviceid_node(d); | ||
233 | return true; | ||
234 | } | ||
235 | EXPORT_SYMBOL_GPL(nfs4_put_deviceid_node); | ||
236 | |||
237 | static void | ||
238 | _deviceid_purge_client(const struct nfs_client *clp, long hash) | ||
239 | { | ||
240 | struct nfs4_deviceid_node *d; | ||
241 | struct hlist_node *n, *next; | ||
242 | HLIST_HEAD(tmp); | ||
243 | |||
244 | rcu_read_lock(); | ||
245 | hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node) | ||
246 | if (d->nfs_client == clp && atomic_read(&d->ref)) { | ||
247 | hlist_del_init_rcu(&d->node); | ||
248 | hlist_add_head(&d->node, &tmp); | ||
249 | } | ||
250 | rcu_read_unlock(); | ||
251 | |||
252 | if (hlist_empty(&tmp)) | ||
253 | return; | ||
254 | |||
255 | synchronize_rcu(); | ||
256 | hlist_for_each_entry_safe(d, n, next, &tmp, node) | ||
257 | if (atomic_dec_and_test(&d->ref)) | ||
258 | d->ld->free_deviceid_node(d); | ||
259 | } | ||
260 | |||
261 | void | ||
262 | nfs4_deviceid_purge_client(const struct nfs_client *clp) | ||
263 | { | ||
264 | long h; | ||
265 | |||
266 | spin_lock(&nfs4_deviceid_lock); | ||
267 | for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++) | ||
268 | _deviceid_purge_client(clp, h); | ||
269 | spin_unlock(&nfs4_deviceid_lock); | ||
270 | } | ||
diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 2bcf0dc306a1..20a7f952e244 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c | |||
@@ -288,7 +288,9 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc) | |||
288 | atomic_set(&req->wb_complete, requests); | 288 | atomic_set(&req->wb_complete, requests); |
289 | 289 | ||
290 | BUG_ON(desc->pg_lseg != NULL); | 290 | BUG_ON(desc->pg_lseg != NULL); |
291 | lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ, GFP_KERNEL); | 291 | lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, |
292 | req_offset(req), desc->pg_count, | ||
293 | IOMODE_READ, GFP_KERNEL); | ||
292 | ClearPageError(page); | 294 | ClearPageError(page); |
293 | offset = 0; | 295 | offset = 0; |
294 | nbytes = desc->pg_count; | 296 | nbytes = desc->pg_count; |
@@ -351,7 +353,9 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc) | |||
351 | } | 353 | } |
352 | req = nfs_list_entry(data->pages.next); | 354 | req = nfs_list_entry(data->pages.next); |
353 | if ((!lseg) && list_is_singular(&data->pages)) | 355 | if ((!lseg) && list_is_singular(&data->pages)) |
354 | lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ, GFP_KERNEL); | 356 | lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, |
357 | req_offset(req), desc->pg_count, | ||
358 | IOMODE_READ, GFP_KERNEL); | ||
355 | 359 | ||
356 | ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count, | 360 | ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count, |
357 | 0, lseg); | 361 | 0, lseg); |
@@ -660,7 +664,6 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, | |||
660 | if (ret == 0) | 664 | if (ret == 0) |
661 | goto read_complete; /* all pages were read */ | 665 | goto read_complete; /* all pages were read */ |
662 | 666 | ||
663 | pnfs_pageio_init_read(&pgio, inode); | ||
664 | if (rsize < PAGE_CACHE_SIZE) | 667 | if (rsize < PAGE_CACHE_SIZE) |
665 | nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); | 668 | nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); |
666 | else | 669 | else |
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index e288f06d3fa7..ce40e5c568ba 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -63,6 +63,7 @@ | |||
63 | #include "iostat.h" | 63 | #include "iostat.h" |
64 | #include "internal.h" | 64 | #include "internal.h" |
65 | #include "fscache.h" | 65 | #include "fscache.h" |
66 | #include "pnfs.h" | ||
66 | 67 | ||
67 | #define NFSDBG_FACILITY NFSDBG_VFS | 68 | #define NFSDBG_FACILITY NFSDBG_VFS |
68 | 69 | ||
@@ -732,6 +733,28 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) | |||
732 | 733 | ||
733 | return 0; | 734 | return 0; |
734 | } | 735 | } |
736 | #ifdef CONFIG_NFS_V4_1 | ||
737 | void show_sessions(struct seq_file *m, struct nfs_server *server) | ||
738 | { | ||
739 | if (nfs4_has_session(server->nfs_client)) | ||
740 | seq_printf(m, ",sessions"); | ||
741 | } | ||
742 | #else | ||
743 | void show_sessions(struct seq_file *m, struct nfs_server *server) {} | ||
744 | #endif | ||
745 | |||
746 | #ifdef CONFIG_NFS_V4_1 | ||
747 | void show_pnfs(struct seq_file *m, struct nfs_server *server) | ||
748 | { | ||
749 | seq_printf(m, ",pnfs="); | ||
750 | if (server->pnfs_curr_ld) | ||
751 | seq_printf(m, "%s", server->pnfs_curr_ld->name); | ||
752 | else | ||
753 | seq_printf(m, "not configured"); | ||
754 | } | ||
755 | #else /* CONFIG_NFS_V4_1 */ | ||
756 | void show_pnfs(struct seq_file *m, struct nfs_server *server) {} | ||
757 | #endif /* CONFIG_NFS_V4_1 */ | ||
735 | 758 | ||
736 | static int nfs_show_devname(struct seq_file *m, struct vfsmount *mnt) | 759 | static int nfs_show_devname(struct seq_file *m, struct vfsmount *mnt) |
737 | { | 760 | { |
@@ -792,6 +815,8 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt) | |||
792 | seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]); | 815 | seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]); |
793 | seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]); | 816 | seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]); |
794 | seq_printf(m, ",acl=0x%x", nfss->acl_bitmask); | 817 | seq_printf(m, ",acl=0x%x", nfss->acl_bitmask); |
818 | show_sessions(m, nfss); | ||
819 | show_pnfs(m, nfss); | ||
795 | } | 820 | } |
796 | #endif | 821 | #endif |
797 | 822 | ||
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 49c715b4ac92..e268e3b23497 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -939,7 +939,9 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc) | |||
939 | atomic_set(&req->wb_complete, requests); | 939 | atomic_set(&req->wb_complete, requests); |
940 | 940 | ||
941 | BUG_ON(desc->pg_lseg); | 941 | BUG_ON(desc->pg_lseg); |
942 | lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW, GFP_NOFS); | 942 | lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, |
943 | req_offset(req), desc->pg_count, | ||
944 | IOMODE_RW, GFP_NOFS); | ||
943 | ClearPageError(page); | 945 | ClearPageError(page); |
944 | offset = 0; | 946 | offset = 0; |
945 | nbytes = desc->pg_count; | 947 | nbytes = desc->pg_count; |
@@ -1013,7 +1015,9 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc) | |||
1013 | } | 1015 | } |
1014 | req = nfs_list_entry(data->pages.next); | 1016 | req = nfs_list_entry(data->pages.next); |
1015 | if ((!lseg) && list_is_singular(&data->pages)) | 1017 | if ((!lseg) && list_is_singular(&data->pages)) |
1016 | lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW, GFP_NOFS); | 1018 | lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, |
1019 | req_offset(req), desc->pg_count, | ||
1020 | IOMODE_RW, GFP_NOFS); | ||
1017 | 1021 | ||
1018 | if ((desc->pg_ioflags & FLUSH_COND_STABLE) && | 1022 | if ((desc->pg_ioflags & FLUSH_COND_STABLE) && |
1019 | (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit)) | 1023 | (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit)) |
@@ -1032,8 +1036,6 @@ static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, | |||
1032 | { | 1036 | { |
1033 | size_t wsize = NFS_SERVER(inode)->wsize; | 1037 | size_t wsize = NFS_SERVER(inode)->wsize; |
1034 | 1038 | ||
1035 | pnfs_pageio_init_write(pgio, inode); | ||
1036 | |||
1037 | if (wsize < PAGE_CACHE_SIZE) | 1039 | if (wsize < PAGE_CACHE_SIZE) |
1038 | nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags); | 1040 | nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags); |
1039 | else | 1041 | else |
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index ad000aeb21a2..b9566e46219f 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c | |||
@@ -1354,12 +1354,6 @@ exp_pseudoroot(struct svc_rqst *rqstp, struct svc_fh *fhp) | |||
1354 | if (IS_ERR(exp)) | 1354 | if (IS_ERR(exp)) |
1355 | return nfserrno(PTR_ERR(exp)); | 1355 | return nfserrno(PTR_ERR(exp)); |
1356 | rv = fh_compose(fhp, exp, exp->ex_path.dentry, NULL); | 1356 | rv = fh_compose(fhp, exp, exp->ex_path.dentry, NULL); |
1357 | if (rv) | ||
1358 | goto out; | ||
1359 | rv = check_nfsd_access(exp, rqstp); | ||
1360 | if (rv) | ||
1361 | fh_put(fhp); | ||
1362 | out: | ||
1363 | exp_put(exp); | 1357 | exp_put(exp); |
1364 | return rv; | 1358 | return rv; |
1365 | } | 1359 | } |
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 2247fc91d5e9..9095f3c21df9 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c | |||
@@ -245,7 +245,7 @@ nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp, | |||
245 | } | 245 | } |
246 | 246 | ||
247 | /* Now create the file and set attributes */ | 247 | /* Now create the file and set attributes */ |
248 | nfserr = nfsd_create_v3(rqstp, dirfhp, argp->name, argp->len, | 248 | nfserr = do_nfsd_create(rqstp, dirfhp, argp->name, argp->len, |
249 | attr, newfhp, | 249 | attr, newfhp, |
250 | argp->createmode, argp->verf, NULL, NULL); | 250 | argp->createmode, argp->verf, NULL, NULL); |
251 | 251 | ||
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index ad48faca20fc..08c6e36ab2eb 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c | |||
@@ -842,7 +842,7 @@ out: | |||
842 | return rv; | 842 | return rv; |
843 | } | 843 | } |
844 | 844 | ||
845 | __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen) | 845 | static __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen) |
846 | { | 846 | { |
847 | struct svc_fh fh; | 847 | struct svc_fh fh; |
848 | int err; | 848 | int err; |
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 5fcb1396a7e3..3a6dbd70b34b 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c | |||
@@ -196,9 +196,9 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o | |||
196 | 196 | ||
197 | /* | 197 | /* |
198 | * Note: create modes (UNCHECKED,GUARDED...) are the same | 198 | * Note: create modes (UNCHECKED,GUARDED...) are the same |
199 | * in NFSv4 as in v3. | 199 | * in NFSv4 as in v3 except EXCLUSIVE4_1. |
200 | */ | 200 | */ |
201 | status = nfsd_create_v3(rqstp, current_fh, open->op_fname.data, | 201 | status = do_nfsd_create(rqstp, current_fh, open->op_fname.data, |
202 | open->op_fname.len, &open->op_iattr, | 202 | open->op_fname.len, &open->op_iattr, |
203 | &resfh, open->op_createmode, | 203 | &resfh, open->op_createmode, |
204 | (u32 *)open->op_verf.data, | 204 | (u32 *)open->op_verf.data, |
@@ -403,7 +403,7 @@ nfsd4_putfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
403 | cstate->current_fh.fh_handle.fh_size = putfh->pf_fhlen; | 403 | cstate->current_fh.fh_handle.fh_size = putfh->pf_fhlen; |
404 | memcpy(&cstate->current_fh.fh_handle.fh_base, putfh->pf_fhval, | 404 | memcpy(&cstate->current_fh.fh_handle.fh_base, putfh->pf_fhval, |
405 | putfh->pf_fhlen); | 405 | putfh->pf_fhlen); |
406 | return fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP); | 406 | return fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_BYPASS_GSS); |
407 | } | 407 | } |
408 | 408 | ||
409 | static __be32 | 409 | static __be32 |
@@ -762,6 +762,9 @@ nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
762 | __be32 err; | 762 | __be32 err; |
763 | 763 | ||
764 | fh_init(&resfh, NFS4_FHSIZE); | 764 | fh_init(&resfh, NFS4_FHSIZE); |
765 | err = fh_verify(rqstp, &cstate->current_fh, S_IFDIR, NFSD_MAY_EXEC); | ||
766 | if (err) | ||
767 | return err; | ||
765 | err = nfsd_lookup_dentry(rqstp, &cstate->current_fh, | 768 | err = nfsd_lookup_dentry(rqstp, &cstate->current_fh, |
766 | secinfo->si_name, secinfo->si_namelen, | 769 | secinfo->si_name, secinfo->si_namelen, |
767 | &exp, &dentry); | 770 | &exp, &dentry); |
@@ -986,6 +989,9 @@ enum nfsd4_op_flags { | |||
986 | ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */ | 989 | ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */ |
987 | ALLOWED_ON_ABSENT_FS = 1 << 1, /* ops processed on absent fs */ | 990 | ALLOWED_ON_ABSENT_FS = 1 << 1, /* ops processed on absent fs */ |
988 | ALLOWED_AS_FIRST_OP = 1 << 2, /* ops reqired first in compound */ | 991 | ALLOWED_AS_FIRST_OP = 1 << 2, /* ops reqired first in compound */ |
992 | /* For rfc 5661 section 2.6.3.1.1: */ | ||
993 | OP_HANDLES_WRONGSEC = 1 << 3, | ||
994 | OP_IS_PUTFH_LIKE = 1 << 4, | ||
989 | }; | 995 | }; |
990 | 996 | ||
991 | struct nfsd4_operation { | 997 | struct nfsd4_operation { |
@@ -1031,6 +1037,44 @@ static __be32 nfs41_check_op_ordering(struct nfsd4_compoundargs *args) | |||
1031 | return nfs_ok; | 1037 | return nfs_ok; |
1032 | } | 1038 | } |
1033 | 1039 | ||
1040 | static inline struct nfsd4_operation *OPDESC(struct nfsd4_op *op) | ||
1041 | { | ||
1042 | return &nfsd4_ops[op->opnum]; | ||
1043 | } | ||
1044 | |||
1045 | static bool need_wrongsec_check(struct svc_rqst *rqstp) | ||
1046 | { | ||
1047 | struct nfsd4_compoundres *resp = rqstp->rq_resp; | ||
1048 | struct nfsd4_compoundargs *argp = rqstp->rq_argp; | ||
1049 | struct nfsd4_op *this = &argp->ops[resp->opcnt - 1]; | ||
1050 | struct nfsd4_op *next = &argp->ops[resp->opcnt]; | ||
1051 | struct nfsd4_operation *thisd; | ||
1052 | struct nfsd4_operation *nextd; | ||
1053 | |||
1054 | thisd = OPDESC(this); | ||
1055 | /* | ||
1056 | * Most ops check wronsec on our own; only the putfh-like ops | ||
1057 | * have special rules. | ||
1058 | */ | ||
1059 | if (!(thisd->op_flags & OP_IS_PUTFH_LIKE)) | ||
1060 | return false; | ||
1061 | /* | ||
1062 | * rfc 5661 2.6.3.1.1.6: don't bother erroring out a | ||
1063 | * put-filehandle operation if we're not going to use the | ||
1064 | * result: | ||
1065 | */ | ||
1066 | if (argp->opcnt == resp->opcnt) | ||
1067 | return false; | ||
1068 | |||
1069 | nextd = OPDESC(next); | ||
1070 | /* | ||
1071 | * Rest of 2.6.3.1.1: certain operations will return WRONGSEC | ||
1072 | * errors themselves as necessary; others should check for them | ||
1073 | * now: | ||
1074 | */ | ||
1075 | return !(nextd->op_flags & OP_HANDLES_WRONGSEC); | ||
1076 | } | ||
1077 | |||
1034 | /* | 1078 | /* |
1035 | * COMPOUND call. | 1079 | * COMPOUND call. |
1036 | */ | 1080 | */ |
@@ -1108,7 +1152,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, | |||
1108 | goto encode_op; | 1152 | goto encode_op; |
1109 | } | 1153 | } |
1110 | 1154 | ||
1111 | opdesc = &nfsd4_ops[op->opnum]; | 1155 | opdesc = OPDESC(op); |
1112 | 1156 | ||
1113 | if (!cstate->current_fh.fh_dentry) { | 1157 | if (!cstate->current_fh.fh_dentry) { |
1114 | if (!(opdesc->op_flags & ALLOWED_WITHOUT_FH)) { | 1158 | if (!(opdesc->op_flags & ALLOWED_WITHOUT_FH)) { |
@@ -1126,6 +1170,9 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, | |||
1126 | else | 1170 | else |
1127 | BUG_ON(op->status == nfs_ok); | 1171 | BUG_ON(op->status == nfs_ok); |
1128 | 1172 | ||
1173 | if (!op->status && need_wrongsec_check(rqstp)) | ||
1174 | op->status = check_nfsd_access(cstate->current_fh.fh_export, rqstp); | ||
1175 | |||
1129 | encode_op: | 1176 | encode_op: |
1130 | /* Only from SEQUENCE */ | 1177 | /* Only from SEQUENCE */ |
1131 | if (resp->cstate.status == nfserr_replay_cache) { | 1178 | if (resp->cstate.status == nfserr_replay_cache) { |
@@ -1217,10 +1264,12 @@ static struct nfsd4_operation nfsd4_ops[] = { | |||
1217 | }, | 1264 | }, |
1218 | [OP_LOOKUP] = { | 1265 | [OP_LOOKUP] = { |
1219 | .op_func = (nfsd4op_func)nfsd4_lookup, | 1266 | .op_func = (nfsd4op_func)nfsd4_lookup, |
1267 | .op_flags = OP_HANDLES_WRONGSEC, | ||
1220 | .op_name = "OP_LOOKUP", | 1268 | .op_name = "OP_LOOKUP", |
1221 | }, | 1269 | }, |
1222 | [OP_LOOKUPP] = { | 1270 | [OP_LOOKUPP] = { |
1223 | .op_func = (nfsd4op_func)nfsd4_lookupp, | 1271 | .op_func = (nfsd4op_func)nfsd4_lookupp, |
1272 | .op_flags = OP_HANDLES_WRONGSEC, | ||
1224 | .op_name = "OP_LOOKUPP", | 1273 | .op_name = "OP_LOOKUPP", |
1225 | }, | 1274 | }, |
1226 | [OP_NVERIFY] = { | 1275 | [OP_NVERIFY] = { |
@@ -1229,6 +1278,7 @@ static struct nfsd4_operation nfsd4_ops[] = { | |||
1229 | }, | 1278 | }, |
1230 | [OP_OPEN] = { | 1279 | [OP_OPEN] = { |
1231 | .op_func = (nfsd4op_func)nfsd4_open, | 1280 | .op_func = (nfsd4op_func)nfsd4_open, |
1281 | .op_flags = OP_HANDLES_WRONGSEC, | ||
1232 | .op_name = "OP_OPEN", | 1282 | .op_name = "OP_OPEN", |
1233 | }, | 1283 | }, |
1234 | [OP_OPEN_CONFIRM] = { | 1284 | [OP_OPEN_CONFIRM] = { |
@@ -1241,17 +1291,20 @@ static struct nfsd4_operation nfsd4_ops[] = { | |||
1241 | }, | 1291 | }, |
1242 | [OP_PUTFH] = { | 1292 | [OP_PUTFH] = { |
1243 | .op_func = (nfsd4op_func)nfsd4_putfh, | 1293 | .op_func = (nfsd4op_func)nfsd4_putfh, |
1244 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, | 1294 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS |
1295 | | OP_IS_PUTFH_LIKE, | ||
1245 | .op_name = "OP_PUTFH", | 1296 | .op_name = "OP_PUTFH", |
1246 | }, | 1297 | }, |
1247 | [OP_PUTPUBFH] = { | 1298 | [OP_PUTPUBFH] = { |
1248 | .op_func = (nfsd4op_func)nfsd4_putrootfh, | 1299 | .op_func = (nfsd4op_func)nfsd4_putrootfh, |
1249 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, | 1300 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS |
1301 | | OP_IS_PUTFH_LIKE, | ||
1250 | .op_name = "OP_PUTPUBFH", | 1302 | .op_name = "OP_PUTPUBFH", |
1251 | }, | 1303 | }, |
1252 | [OP_PUTROOTFH] = { | 1304 | [OP_PUTROOTFH] = { |
1253 | .op_func = (nfsd4op_func)nfsd4_putrootfh, | 1305 | .op_func = (nfsd4op_func)nfsd4_putrootfh, |
1254 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, | 1306 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS |
1307 | | OP_IS_PUTFH_LIKE, | ||
1255 | .op_name = "OP_PUTROOTFH", | 1308 | .op_name = "OP_PUTROOTFH", |
1256 | }, | 1309 | }, |
1257 | [OP_READ] = { | 1310 | [OP_READ] = { |
@@ -1281,15 +1334,18 @@ static struct nfsd4_operation nfsd4_ops[] = { | |||
1281 | }, | 1334 | }, |
1282 | [OP_RESTOREFH] = { | 1335 | [OP_RESTOREFH] = { |
1283 | .op_func = (nfsd4op_func)nfsd4_restorefh, | 1336 | .op_func = (nfsd4op_func)nfsd4_restorefh, |
1284 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, | 1337 | .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS |
1338 | | OP_IS_PUTFH_LIKE, | ||
1285 | .op_name = "OP_RESTOREFH", | 1339 | .op_name = "OP_RESTOREFH", |
1286 | }, | 1340 | }, |
1287 | [OP_SAVEFH] = { | 1341 | [OP_SAVEFH] = { |
1288 | .op_func = (nfsd4op_func)nfsd4_savefh, | 1342 | .op_func = (nfsd4op_func)nfsd4_savefh, |
1343 | .op_flags = OP_HANDLES_WRONGSEC, | ||
1289 | .op_name = "OP_SAVEFH", | 1344 | .op_name = "OP_SAVEFH", |
1290 | }, | 1345 | }, |
1291 | [OP_SECINFO] = { | 1346 | [OP_SECINFO] = { |
1292 | .op_func = (nfsd4op_func)nfsd4_secinfo, | 1347 | .op_func = (nfsd4op_func)nfsd4_secinfo, |
1348 | .op_flags = OP_HANDLES_WRONGSEC, | ||
1293 | .op_name = "OP_SECINFO", | 1349 | .op_name = "OP_SECINFO", |
1294 | }, | 1350 | }, |
1295 | [OP_SETATTR] = { | 1351 | [OP_SETATTR] = { |
@@ -1353,6 +1409,7 @@ static struct nfsd4_operation nfsd4_ops[] = { | |||
1353 | }, | 1409 | }, |
1354 | [OP_SECINFO_NO_NAME] = { | 1410 | [OP_SECINFO_NO_NAME] = { |
1355 | .op_func = (nfsd4op_func)nfsd4_secinfo_no_name, | 1411 | .op_func = (nfsd4op_func)nfsd4_secinfo_no_name, |
1412 | .op_flags = OP_HANDLES_WRONGSEC, | ||
1356 | .op_name = "OP_SECINFO_NO_NAME", | 1413 | .op_name = "OP_SECINFO_NO_NAME", |
1357 | }, | 1414 | }, |
1358 | }; | 1415 | }; |
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 4cf04e11c66c..e98f3c2e9492 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c | |||
@@ -1519,6 +1519,9 @@ nfsd4_create_session(struct svc_rqst *rqstp, | |||
1519 | bool confirm_me = false; | 1519 | bool confirm_me = false; |
1520 | int status = 0; | 1520 | int status = 0; |
1521 | 1521 | ||
1522 | if (cr_ses->flags & ~SESSION4_FLAG_MASK_A) | ||
1523 | return nfserr_inval; | ||
1524 | |||
1522 | nfs4_lock_state(); | 1525 | nfs4_lock_state(); |
1523 | unconf = find_unconfirmed_client(&cr_ses->clientid); | 1526 | unconf = find_unconfirmed_client(&cr_ses->clientid); |
1524 | conf = find_confirmed_client(&cr_ses->clientid); | 1527 | conf = find_confirmed_client(&cr_ses->clientid); |
@@ -1637,8 +1640,9 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp, | |||
1637 | return nfserr_badsession; | 1640 | return nfserr_badsession; |
1638 | 1641 | ||
1639 | status = nfsd4_map_bcts_dir(&bcts->dir); | 1642 | status = nfsd4_map_bcts_dir(&bcts->dir); |
1640 | nfsd4_new_conn(rqstp, cstate->session, bcts->dir); | 1643 | if (!status) |
1641 | return nfs_ok; | 1644 | nfsd4_new_conn(rqstp, cstate->session, bcts->dir); |
1645 | return status; | ||
1642 | } | 1646 | } |
1643 | 1647 | ||
1644 | static bool nfsd4_compound_in_session(struct nfsd4_session *session, struct nfs4_sessionid *sid) | 1648 | static bool nfsd4_compound_in_session(struct nfsd4_session *session, struct nfs4_sessionid *sid) |
@@ -1725,6 +1729,13 @@ static void nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_sessi | |||
1725 | return; | 1729 | return; |
1726 | } | 1730 | } |
1727 | 1731 | ||
1732 | static bool nfsd4_session_too_many_ops(struct svc_rqst *rqstp, struct nfsd4_session *session) | ||
1733 | { | ||
1734 | struct nfsd4_compoundargs *args = rqstp->rq_argp; | ||
1735 | |||
1736 | return args->opcnt > session->se_fchannel.maxops; | ||
1737 | } | ||
1738 | |||
1728 | __be32 | 1739 | __be32 |
1729 | nfsd4_sequence(struct svc_rqst *rqstp, | 1740 | nfsd4_sequence(struct svc_rqst *rqstp, |
1730 | struct nfsd4_compound_state *cstate, | 1741 | struct nfsd4_compound_state *cstate, |
@@ -1753,6 +1764,10 @@ nfsd4_sequence(struct svc_rqst *rqstp, | |||
1753 | if (!session) | 1764 | if (!session) |
1754 | goto out; | 1765 | goto out; |
1755 | 1766 | ||
1767 | status = nfserr_too_many_ops; | ||
1768 | if (nfsd4_session_too_many_ops(rqstp, session)) | ||
1769 | goto out; | ||
1770 | |||
1756 | status = nfserr_badslot; | 1771 | status = nfserr_badslot; |
1757 | if (seq->slotid >= session->se_fchannel.maxreqs) | 1772 | if (seq->slotid >= session->se_fchannel.maxreqs) |
1758 | goto out; | 1773 | goto out; |
@@ -1808,6 +1823,8 @@ out: | |||
1808 | __be32 | 1823 | __be32 |
1809 | nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_reclaim_complete *rc) | 1824 | nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_reclaim_complete *rc) |
1810 | { | 1825 | { |
1826 | int status = 0; | ||
1827 | |||
1811 | if (rc->rca_one_fs) { | 1828 | if (rc->rca_one_fs) { |
1812 | if (!cstate->current_fh.fh_dentry) | 1829 | if (!cstate->current_fh.fh_dentry) |
1813 | return nfserr_nofilehandle; | 1830 | return nfserr_nofilehandle; |
@@ -1817,9 +1834,14 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta | |||
1817 | */ | 1834 | */ |
1818 | return nfs_ok; | 1835 | return nfs_ok; |
1819 | } | 1836 | } |
1837 | |||
1820 | nfs4_lock_state(); | 1838 | nfs4_lock_state(); |
1821 | if (is_client_expired(cstate->session->se_client)) { | 1839 | status = nfserr_complete_already; |
1822 | nfs4_unlock_state(); | 1840 | if (cstate->session->se_client->cl_firststate) |
1841 | goto out; | ||
1842 | |||
1843 | status = nfserr_stale_clientid; | ||
1844 | if (is_client_expired(cstate->session->se_client)) | ||
1823 | /* | 1845 | /* |
1824 | * The following error isn't really legal. | 1846 | * The following error isn't really legal. |
1825 | * But we only get here if the client just explicitly | 1847 | * But we only get here if the client just explicitly |
@@ -1827,11 +1849,13 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta | |||
1827 | * error it gets back on an operation for the dead | 1849 | * error it gets back on an operation for the dead |
1828 | * client. | 1850 | * client. |
1829 | */ | 1851 | */ |
1830 | return nfserr_stale_clientid; | 1852 | goto out; |
1831 | } | 1853 | |
1854 | status = nfs_ok; | ||
1832 | nfsd4_create_clid_dir(cstate->session->se_client); | 1855 | nfsd4_create_clid_dir(cstate->session->se_client); |
1856 | out: | ||
1833 | nfs4_unlock_state(); | 1857 | nfs4_unlock_state(); |
1834 | return nfs_ok; | 1858 | return status; |
1835 | } | 1859 | } |
1836 | 1860 | ||
1837 | __be32 | 1861 | __be32 |
@@ -2462,7 +2486,7 @@ find_delegation_file(struct nfs4_file *fp, stateid_t *stid) | |||
2462 | return NULL; | 2486 | return NULL; |
2463 | } | 2487 | } |
2464 | 2488 | ||
2465 | int share_access_to_flags(u32 share_access) | 2489 | static int share_access_to_flags(u32 share_access) |
2466 | { | 2490 | { |
2467 | share_access &= ~NFS4_SHARE_WANT_MASK; | 2491 | share_access &= ~NFS4_SHARE_WANT_MASK; |
2468 | 2492 | ||
@@ -2882,7 +2906,7 @@ out: | |||
2882 | return status; | 2906 | return status; |
2883 | } | 2907 | } |
2884 | 2908 | ||
2885 | struct lock_manager nfsd4_manager = { | 2909 | static struct lock_manager nfsd4_manager = { |
2886 | }; | 2910 | }; |
2887 | 2911 | ||
2888 | static void | 2912 | static void |
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index c6766af00d98..990181103214 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c | |||
@@ -424,15 +424,12 @@ nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access | |||
424 | static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, struct nfsd4_bind_conn_to_session *bcts) | 424 | static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, struct nfsd4_bind_conn_to_session *bcts) |
425 | { | 425 | { |
426 | DECODE_HEAD; | 426 | DECODE_HEAD; |
427 | u32 dummy; | ||
428 | 427 | ||
429 | READ_BUF(NFS4_MAX_SESSIONID_LEN + 8); | 428 | READ_BUF(NFS4_MAX_SESSIONID_LEN + 8); |
430 | COPYMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN); | 429 | COPYMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN); |
431 | READ32(bcts->dir); | 430 | READ32(bcts->dir); |
432 | /* XXX: Perhaps Tom Tucker could help us figure out how we | 431 | /* XXX: skipping ctsa_use_conn_in_rdma_mode. Perhaps Tom Tucker |
433 | * should be using ctsa_use_conn_in_rdma_mode: */ | 432 | * could help us figure out we should be using it. */ |
434 | READ32(dummy); | ||
435 | |||
436 | DECODE_TAIL; | 433 | DECODE_TAIL; |
437 | } | 434 | } |
438 | 435 | ||
@@ -588,8 +585,6 @@ nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt) | |||
588 | READ_BUF(lockt->lt_owner.len); | 585 | READ_BUF(lockt->lt_owner.len); |
589 | READMEM(lockt->lt_owner.data, lockt->lt_owner.len); | 586 | READMEM(lockt->lt_owner.data, lockt->lt_owner.len); |
590 | 587 | ||
591 | if (argp->minorversion && !zero_clientid(&lockt->lt_clientid)) | ||
592 | return nfserr_inval; | ||
593 | DECODE_TAIL; | 588 | DECODE_TAIL; |
594 | } | 589 | } |
595 | 590 | ||
@@ -3120,7 +3115,7 @@ nfsd4_encode_destroy_session(struct nfsd4_compoundres *resp, int nfserr, | |||
3120 | return nfserr; | 3115 | return nfserr; |
3121 | } | 3116 | } |
3122 | 3117 | ||
3123 | __be32 | 3118 | static __be32 |
3124 | nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr, | 3119 | nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr, |
3125 | struct nfsd4_sequence *seq) | 3120 | struct nfsd4_sequence *seq) |
3126 | { | 3121 | { |
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 55c8e63af0be..90c6aa6d5e0f 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c | |||
@@ -344,7 +344,7 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) | |||
344 | * which clients virtually always use auth_sys for, | 344 | * which clients virtually always use auth_sys for, |
345 | * even while using RPCSEC_GSS for NFS. | 345 | * even while using RPCSEC_GSS for NFS. |
346 | */ | 346 | */ |
347 | if (access & NFSD_MAY_LOCK) | 347 | if (access & NFSD_MAY_LOCK || access & NFSD_MAY_BYPASS_GSS) |
348 | goto skip_pseudoflavor_check; | 348 | goto skip_pseudoflavor_check; |
349 | /* | 349 | /* |
350 | * Clients may expect to be able to use auth_sys during mount, | 350 | * Clients may expect to be able to use auth_sys during mount, |
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 129f3c9f62d5..d5718273bb32 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
@@ -181,16 +181,10 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
181 | struct svc_export *exp; | 181 | struct svc_export *exp; |
182 | struct dentry *dparent; | 182 | struct dentry *dparent; |
183 | struct dentry *dentry; | 183 | struct dentry *dentry; |
184 | __be32 err; | ||
185 | int host_err; | 184 | int host_err; |
186 | 185 | ||
187 | dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name); | 186 | dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name); |
188 | 187 | ||
189 | /* Obtain dentry and export. */ | ||
190 | err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC); | ||
191 | if (err) | ||
192 | return err; | ||
193 | |||
194 | dparent = fhp->fh_dentry; | 188 | dparent = fhp->fh_dentry; |
195 | exp = fhp->fh_export; | 189 | exp = fhp->fh_export; |
196 | exp_get(exp); | 190 | exp_get(exp); |
@@ -254,6 +248,9 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, | |||
254 | struct dentry *dentry; | 248 | struct dentry *dentry; |
255 | __be32 err; | 249 | __be32 err; |
256 | 250 | ||
251 | err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC); | ||
252 | if (err) | ||
253 | return err; | ||
257 | err = nfsd_lookup_dentry(rqstp, fhp, name, len, &exp, &dentry); | 254 | err = nfsd_lookup_dentry(rqstp, fhp, name, len, &exp, &dentry); |
258 | if (err) | 255 | if (err) |
259 | return err; | 256 | return err; |
@@ -877,13 +874,11 @@ static __be32 | |||
877 | nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, | 874 | nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, |
878 | loff_t offset, struct kvec *vec, int vlen, unsigned long *count) | 875 | loff_t offset, struct kvec *vec, int vlen, unsigned long *count) |
879 | { | 876 | { |
880 | struct inode *inode; | ||
881 | mm_segment_t oldfs; | 877 | mm_segment_t oldfs; |
882 | __be32 err; | 878 | __be32 err; |
883 | int host_err; | 879 | int host_err; |
884 | 880 | ||
885 | err = nfserr_perm; | 881 | err = nfserr_perm; |
886 | inode = file->f_path.dentry->d_inode; | ||
887 | 882 | ||
888 | if (file->f_op->splice_read && rqstp->rq_splice_ok) { | 883 | if (file->f_op->splice_read && rqstp->rq_splice_ok) { |
889 | struct splice_desc sd = { | 884 | struct splice_desc sd = { |
@@ -1340,11 +1335,18 @@ out_nfserr: | |||
1340 | } | 1335 | } |
1341 | 1336 | ||
1342 | #ifdef CONFIG_NFSD_V3 | 1337 | #ifdef CONFIG_NFSD_V3 |
1338 | |||
1339 | static inline int nfsd_create_is_exclusive(int createmode) | ||
1340 | { | ||
1341 | return createmode == NFS3_CREATE_EXCLUSIVE | ||
1342 | || createmode == NFS4_CREATE_EXCLUSIVE4_1; | ||
1343 | } | ||
1344 | |||
1343 | /* | 1345 | /* |
1344 | * NFSv3 version of nfsd_create | 1346 | * NFSv3 and NFSv4 version of nfsd_create |
1345 | */ | 1347 | */ |
1346 | __be32 | 1348 | __be32 |
1347 | nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, | 1349 | do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, |
1348 | char *fname, int flen, struct iattr *iap, | 1350 | char *fname, int flen, struct iattr *iap, |
1349 | struct svc_fh *resfhp, int createmode, u32 *verifier, | 1351 | struct svc_fh *resfhp, int createmode, u32 *verifier, |
1350 | int *truncp, int *created) | 1352 | int *truncp, int *created) |
@@ -1396,7 +1398,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
1396 | if (err) | 1398 | if (err) |
1397 | goto out; | 1399 | goto out; |
1398 | 1400 | ||
1399 | if (createmode == NFS3_CREATE_EXCLUSIVE) { | 1401 | if (nfsd_create_is_exclusive(createmode)) { |
1400 | /* solaris7 gets confused (bugid 4218508) if these have | 1402 | /* solaris7 gets confused (bugid 4218508) if these have |
1401 | * the high bit set, so just clear the high bits. If this is | 1403 | * the high bit set, so just clear the high bits. If this is |
1402 | * ever changed to use different attrs for storing the | 1404 | * ever changed to use different attrs for storing the |
@@ -1437,6 +1439,11 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
1437 | && dchild->d_inode->i_atime.tv_sec == v_atime | 1439 | && dchild->d_inode->i_atime.tv_sec == v_atime |
1438 | && dchild->d_inode->i_size == 0 ) | 1440 | && dchild->d_inode->i_size == 0 ) |
1439 | break; | 1441 | break; |
1442 | case NFS4_CREATE_EXCLUSIVE4_1: | ||
1443 | if ( dchild->d_inode->i_mtime.tv_sec == v_mtime | ||
1444 | && dchild->d_inode->i_atime.tv_sec == v_atime | ||
1445 | && dchild->d_inode->i_size == 0 ) | ||
1446 | goto set_attr; | ||
1440 | /* fallthru */ | 1447 | /* fallthru */ |
1441 | case NFS3_CREATE_GUARDED: | 1448 | case NFS3_CREATE_GUARDED: |
1442 | err = nfserr_exist; | 1449 | err = nfserr_exist; |
@@ -1455,7 +1462,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
1455 | 1462 | ||
1456 | nfsd_check_ignore_resizing(iap); | 1463 | nfsd_check_ignore_resizing(iap); |
1457 | 1464 | ||
1458 | if (createmode == NFS3_CREATE_EXCLUSIVE) { | 1465 | if (nfsd_create_is_exclusive(createmode)) { |
1459 | /* Cram the verifier into atime/mtime */ | 1466 | /* Cram the verifier into atime/mtime */ |
1460 | iap->ia_valid = ATTR_MTIME|ATTR_ATIME | 1467 | iap->ia_valid = ATTR_MTIME|ATTR_ATIME |
1461 | | ATTR_MTIME_SET|ATTR_ATIME_SET; | 1468 | | ATTR_MTIME_SET|ATTR_ATIME_SET; |
@@ -2034,7 +2041,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, | |||
2034 | struct inode *inode = dentry->d_inode; | 2041 | struct inode *inode = dentry->d_inode; |
2035 | int err; | 2042 | int err; |
2036 | 2043 | ||
2037 | if (acc == NFSD_MAY_NOP) | 2044 | if ((acc & NFSD_MAY_MASK) == NFSD_MAY_NOP) |
2038 | return 0; | 2045 | return 0; |
2039 | #if 0 | 2046 | #if 0 |
2040 | dprintk("nfsd: permission 0x%x%s%s%s%s%s%s%s mode 0%o%s%s%s\n", | 2047 | dprintk("nfsd: permission 0x%x%s%s%s%s%s%s%s mode 0%o%s%s%s\n", |
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 9a370a5e36b7..e0bbac04d1dd 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h | |||
@@ -17,10 +17,14 @@ | |||
17 | #define NFSD_MAY_SATTR 8 | 17 | #define NFSD_MAY_SATTR 8 |
18 | #define NFSD_MAY_TRUNC 16 | 18 | #define NFSD_MAY_TRUNC 16 |
19 | #define NFSD_MAY_LOCK 32 | 19 | #define NFSD_MAY_LOCK 32 |
20 | #define NFSD_MAY_MASK 63 | ||
21 | |||
22 | /* extra hints to permission and open routines: */ | ||
20 | #define NFSD_MAY_OWNER_OVERRIDE 64 | 23 | #define NFSD_MAY_OWNER_OVERRIDE 64 |
21 | #define NFSD_MAY_LOCAL_ACCESS 128 /* IRIX doing local access check on device special file*/ | 24 | #define NFSD_MAY_LOCAL_ACCESS 128 /* IRIX doing local access check on device special file*/ |
22 | #define NFSD_MAY_BYPASS_GSS_ON_ROOT 256 | 25 | #define NFSD_MAY_BYPASS_GSS_ON_ROOT 256 |
23 | #define NFSD_MAY_NOT_BREAK_LEASE 512 | 26 | #define NFSD_MAY_NOT_BREAK_LEASE 512 |
27 | #define NFSD_MAY_BYPASS_GSS 1024 | ||
24 | 28 | ||
25 | #define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE) | 29 | #define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE) |
26 | #define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC) | 30 | #define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC) |
@@ -54,7 +58,7 @@ __be32 nfsd_create(struct svc_rqst *, struct svc_fh *, | |||
54 | int type, dev_t rdev, struct svc_fh *res); | 58 | int type, dev_t rdev, struct svc_fh *res); |
55 | #ifdef CONFIG_NFSD_V3 | 59 | #ifdef CONFIG_NFSD_V3 |
56 | __be32 nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *); | 60 | __be32 nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *); |
57 | __be32 nfsd_create_v3(struct svc_rqst *, struct svc_fh *, | 61 | __be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *, |
58 | char *name, int len, struct iattr *attrs, | 62 | char *name, int len, struct iattr *attrs, |
59 | struct svc_fh *res, int createmode, | 63 | struct svc_fh *res, int createmode, |
60 | u32 *verifier, int *truncp, int *created); | 64 | u32 *verifier, int *truncp, int *created); |
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 587f18432832..b954878ad6ce 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c | |||
@@ -917,7 +917,7 @@ int nilfs_mark_inode_dirty(struct inode *inode) | |||
917 | * construction. This function can be called both as a single operation | 917 | * construction. This function can be called both as a single operation |
918 | * and as a part of indivisible file operations. | 918 | * and as a part of indivisible file operations. |
919 | */ | 919 | */ |
920 | void nilfs_dirty_inode(struct inode *inode) | 920 | void nilfs_dirty_inode(struct inode *inode, int flags) |
921 | { | 921 | { |
922 | struct nilfs_transaction_info ti; | 922 | struct nilfs_transaction_info ti; |
923 | struct nilfs_mdt_info *mdi = NILFS_MDT(inode); | 923 | struct nilfs_mdt_info *mdi = NILFS_MDT(inode); |
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index 1102a5fbb744..546849b3e88f 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c | |||
@@ -334,8 +334,6 @@ static int nilfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
334 | struct nilfs_transaction_info ti; | 334 | struct nilfs_transaction_info ti; |
335 | int err; | 335 | int err; |
336 | 336 | ||
337 | dentry_unhash(dentry); | ||
338 | |||
339 | err = nilfs_transaction_begin(dir->i_sb, &ti, 0); | 337 | err = nilfs_transaction_begin(dir->i_sb, &ti, 0); |
340 | if (err) | 338 | if (err) |
341 | return err; | 339 | return err; |
@@ -371,9 +369,6 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
371 | struct nilfs_transaction_info ti; | 369 | struct nilfs_transaction_info ti; |
372 | int err; | 370 | int err; |
373 | 371 | ||
374 | if (new_inode && S_ISDIR(new_inode->i_mode)) | ||
375 | dentry_unhash(new_dentry); | ||
376 | |||
377 | err = nilfs_transaction_begin(old_dir->i_sb, &ti, 1); | 372 | err = nilfs_transaction_begin(old_dir->i_sb, &ti, 1); |
378 | if (unlikely(err)) | 373 | if (unlikely(err)) |
379 | return err; | 374 | return err; |
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index a9c6a531f80c..f02b9ad43a21 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h | |||
@@ -269,7 +269,7 @@ int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh); | |||
269 | extern int nilfs_inode_dirty(struct inode *); | 269 | extern int nilfs_inode_dirty(struct inode *); |
270 | int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty); | 270 | int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty); |
271 | extern int nilfs_mark_inode_dirty(struct inode *); | 271 | extern int nilfs_mark_inode_dirty(struct inode *); |
272 | extern void nilfs_dirty_inode(struct inode *); | 272 | extern void nilfs_dirty_inode(struct inode *, int flags); |
273 | int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 273 | int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
274 | __u64 start, __u64 len); | 274 | __u64 start, __u64 len); |
275 | 275 | ||
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c index c368360c35a1..3b8d3979e03b 100644 --- a/fs/omfs/dir.c +++ b/fs/omfs/dir.c | |||
@@ -241,11 +241,9 @@ static int omfs_remove(struct inode *dir, struct dentry *dentry) | |||
241 | int ret; | 241 | int ret; |
242 | 242 | ||
243 | 243 | ||
244 | if (S_ISDIR(inode->i_mode)) { | 244 | if (S_ISDIR(inode->i_mode) && |
245 | dentry_unhash(dentry); | 245 | !omfs_dir_is_empty(inode)) |
246 | if (!omfs_dir_is_empty(inode)) | 246 | return -ENOTEMPTY; |
247 | return -ENOTEMPTY; | ||
248 | } | ||
249 | 247 | ||
250 | ret = omfs_delete_entry(dentry); | 248 | ret = omfs_delete_entry(dentry); |
251 | if (ret) | 249 | if (ret) |
@@ -382,9 +380,6 @@ static int omfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
382 | int err; | 380 | int err; |
383 | 381 | ||
384 | if (new_inode) { | 382 | if (new_inode) { |
385 | if (S_ISDIR(new_inode->i_mode)) | ||
386 | dentry_unhash(new_dentry); | ||
387 | |||
388 | /* overwriting existing file/dir */ | 383 | /* overwriting existing file/dir */ |
389 | err = omfs_remove(new_dir, new_dentry); | 384 | err = omfs_remove(new_dir, new_dentry); |
390 | if (err) | 385 | if (err) |
diff --git a/fs/partitions/check.c b/fs/partitions/check.c index f82e762eeca2..d545e97d99c3 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c | |||
@@ -255,13 +255,7 @@ ssize_t part_discard_alignment_show(struct device *dev, | |||
255 | struct device_attribute *attr, char *buf) | 255 | struct device_attribute *attr, char *buf) |
256 | { | 256 | { |
257 | struct hd_struct *p = dev_to_part(dev); | 257 | struct hd_struct *p = dev_to_part(dev); |
258 | struct gendisk *disk = dev_to_disk(dev); | 258 | return sprintf(buf, "%u\n", p->discard_alignment); |
259 | unsigned int alignment = 0; | ||
260 | |||
261 | if (disk->queue) | ||
262 | alignment = queue_limit_discard_alignment(&disk->queue->limits, | ||
263 | p->start_sect); | ||
264 | return sprintf(buf, "%u\n", alignment); | ||
265 | } | 259 | } |
266 | 260 | ||
267 | ssize_t part_stat_show(struct device *dev, | 261 | ssize_t part_stat_show(struct device *dev, |
@@ -455,6 +449,8 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, | |||
455 | p->start_sect = start; | 449 | p->start_sect = start; |
456 | p->alignment_offset = | 450 | p->alignment_offset = |
457 | queue_limit_alignment_offset(&disk->queue->limits, start); | 451 | queue_limit_alignment_offset(&disk->queue->limits, start); |
452 | p->discard_alignment = | ||
453 | queue_limit_discard_alignment(&disk->queue->limits, start); | ||
458 | p->nr_sects = len; | 454 | p->nr_sects = len; |
459 | p->partno = partno; | 455 | p->partno = partno; |
460 | p->policy = get_disk_ro(disk); | 456 | p->policy = get_disk_ro(disk); |
diff --git a/fs/proc/base.c b/fs/proc/base.c index 4ede550517a6..14def991d9dd 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -83,6 +83,9 @@ | |||
83 | #include <linux/pid_namespace.h> | 83 | #include <linux/pid_namespace.h> |
84 | #include <linux/fs_struct.h> | 84 | #include <linux/fs_struct.h> |
85 | #include <linux/slab.h> | 85 | #include <linux/slab.h> |
86 | #ifdef CONFIG_HARDWALL | ||
87 | #include <asm/hardwall.h> | ||
88 | #endif | ||
86 | #include "internal.h" | 89 | #include "internal.h" |
87 | 90 | ||
88 | /* NOTE: | 91 | /* NOTE: |
@@ -2842,6 +2845,9 @@ static const struct pid_entry tgid_base_stuff[] = { | |||
2842 | #ifdef CONFIG_TASK_IO_ACCOUNTING | 2845 | #ifdef CONFIG_TASK_IO_ACCOUNTING |
2843 | INF("io", S_IRUGO, proc_tgid_io_accounting), | 2846 | INF("io", S_IRUGO, proc_tgid_io_accounting), |
2844 | #endif | 2847 | #endif |
2848 | #ifdef CONFIG_HARDWALL | ||
2849 | INF("hardwall", S_IRUGO, proc_pid_hardwall), | ||
2850 | #endif | ||
2845 | }; | 2851 | }; |
2846 | 2852 | ||
2847 | static int proc_tgid_base_readdir(struct file * filp, | 2853 | static int proc_tgid_base_readdir(struct file * filp, |
@@ -3181,6 +3187,9 @@ static const struct pid_entry tid_base_stuff[] = { | |||
3181 | #ifdef CONFIG_TASK_IO_ACCOUNTING | 3187 | #ifdef CONFIG_TASK_IO_ACCOUNTING |
3182 | INF("io", S_IRUGO, proc_tid_io_accounting), | 3188 | INF("io", S_IRUGO, proc_tid_io_accounting), |
3183 | #endif | 3189 | #endif |
3190 | #ifdef CONFIG_HARDWALL | ||
3191 | INF("hardwall", S_IRUGO, proc_pid_hardwall), | ||
3192 | #endif | ||
3184 | }; | 3193 | }; |
3185 | 3194 | ||
3186 | static int proc_tid_base_readdir(struct file * filp, | 3195 | static int proc_tid_base_readdir(struct file * filp, |
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index 76c8164d5651..118662690cdf 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c | |||
@@ -831,8 +831,6 @@ static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
831 | INITIALIZE_PATH(path); | 831 | INITIALIZE_PATH(path); |
832 | struct reiserfs_dir_entry de; | 832 | struct reiserfs_dir_entry de; |
833 | 833 | ||
834 | dentry_unhash(dentry); | ||
835 | |||
836 | /* we will be doing 2 balancings and update 2 stat data, we change quotas | 834 | /* we will be doing 2 balancings and update 2 stat data, we change quotas |
837 | * of the owner of the directory and of the owner of the parent directory. | 835 | * of the owner of the directory and of the owner of the parent directory. |
838 | * The quota structure is possibly deleted only on last iput => outside | 836 | * The quota structure is possibly deleted only on last iput => outside |
@@ -1227,9 +1225,6 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1227 | unsigned long savelink = 1; | 1225 | unsigned long savelink = 1; |
1228 | struct timespec ctime; | 1226 | struct timespec ctime; |
1229 | 1227 | ||
1230 | if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) | ||
1231 | dentry_unhash(new_dentry); | ||
1232 | |||
1233 | /* three balancings: (1) old name removal, (2) new name insertion | 1228 | /* three balancings: (1) old name removal, (2) new name insertion |
1234 | and (3) maybe "save" link insertion | 1229 | and (3) maybe "save" link insertion |
1235 | stat data updates: (1) old directory, | 1230 | stat data updates: (1) old directory, |
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index b216ff6be1c9..aa91089162cb 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c | |||
@@ -568,7 +568,7 @@ static void destroy_inodecache(void) | |||
568 | } | 568 | } |
569 | 569 | ||
570 | /* we don't mark inodes dirty, we just log them */ | 570 | /* we don't mark inodes dirty, we just log them */ |
571 | static void reiserfs_dirty_inode(struct inode *inode) | 571 | static void reiserfs_dirty_inode(struct inode *inode, int flags) |
572 | { | 572 | { |
573 | struct reiserfs_transaction_handle th; | 573 | struct reiserfs_transaction_handle th; |
574 | 574 | ||
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 50f1abccd1cd..e8a62f41b458 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c | |||
@@ -98,7 +98,6 @@ static int xattr_rmdir(struct inode *dir, struct dentry *dentry) | |||
98 | 98 | ||
99 | reiserfs_mutex_lock_nested_safe(&dentry->d_inode->i_mutex, | 99 | reiserfs_mutex_lock_nested_safe(&dentry->d_inode->i_mutex, |
100 | I_MUTEX_CHILD, dir->i_sb); | 100 | I_MUTEX_CHILD, dir->i_sb); |
101 | dentry_unhash(dentry); | ||
102 | error = dir->i_op->rmdir(dir, dentry); | 101 | error = dir->i_op->rmdir(dir, dentry); |
103 | if (!error) | 102 | if (!error) |
104 | dentry->d_inode->i_flags |= S_DEAD; | 103 | dentry->d_inode->i_flags |= S_DEAD; |
diff --git a/fs/squashfs/export.c b/fs/squashfs/export.c index 730c56248c9b..5e1101ff276f 100644 --- a/fs/squashfs/export.c +++ b/fs/squashfs/export.c | |||
@@ -147,7 +147,7 @@ __le64 *squashfs_read_inode_lookup_table(struct super_block *sb, | |||
147 | * table[0] points to the first inode lookup table metadata block, | 147 | * table[0] points to the first inode lookup table metadata block, |
148 | * this should be less than lookup_table_start | 148 | * this should be less than lookup_table_start |
149 | */ | 149 | */ |
150 | if (!IS_ERR(table) && table[0] >= lookup_table_start) { | 150 | if (!IS_ERR(table) && le64_to_cpu(table[0]) >= lookup_table_start) { |
151 | kfree(table); | 151 | kfree(table); |
152 | return ERR_PTR(-EINVAL); | 152 | return ERR_PTR(-EINVAL); |
153 | } | 153 | } |
diff --git a/fs/squashfs/fragment.c b/fs/squashfs/fragment.c index 1516a6490bfb..0ed6edbc5c71 100644 --- a/fs/squashfs/fragment.c +++ b/fs/squashfs/fragment.c | |||
@@ -90,7 +90,7 @@ __le64 *squashfs_read_fragment_index_table(struct super_block *sb, | |||
90 | * table[0] points to the first fragment table metadata block, this | 90 | * table[0] points to the first fragment table metadata block, this |
91 | * should be less than fragment_table_start | 91 | * should be less than fragment_table_start |
92 | */ | 92 | */ |
93 | if (!IS_ERR(table) && table[0] >= fragment_table_start) { | 93 | if (!IS_ERR(table) && le64_to_cpu(table[0]) >= fragment_table_start) { |
94 | kfree(table); | 94 | kfree(table); |
95 | return ERR_PTR(-EINVAL); | 95 | return ERR_PTR(-EINVAL); |
96 | } | 96 | } |
diff --git a/fs/squashfs/id.c b/fs/squashfs/id.c index a70858e0fb44..d38ea3dab951 100644 --- a/fs/squashfs/id.c +++ b/fs/squashfs/id.c | |||
@@ -93,7 +93,7 @@ __le64 *squashfs_read_id_index_table(struct super_block *sb, | |||
93 | * table[0] points to the first id lookup table metadata block, this | 93 | * table[0] points to the first id lookup table metadata block, this |
94 | * should be less than id_table_start | 94 | * should be less than id_table_start |
95 | */ | 95 | */ |
96 | if (!IS_ERR(table) && table[0] >= id_table_start) { | 96 | if (!IS_ERR(table) && le64_to_cpu(table[0]) >= id_table_start) { |
97 | kfree(table); | 97 | kfree(table); |
98 | return ERR_PTR(-EINVAL); | 98 | return ERR_PTR(-EINVAL); |
99 | } | 99 | } |
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 6f26abee3597..7438850c62d0 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c | |||
@@ -245,7 +245,7 @@ allocate_id_index_table: | |||
245 | msblk->id_table = NULL; | 245 | msblk->id_table = NULL; |
246 | goto failed_mount; | 246 | goto failed_mount; |
247 | } | 247 | } |
248 | next_table = msblk->id_table[0]; | 248 | next_table = le64_to_cpu(msblk->id_table[0]); |
249 | 249 | ||
250 | /* Handle inode lookup table */ | 250 | /* Handle inode lookup table */ |
251 | lookup_table_start = le64_to_cpu(sblk->lookup_table_start); | 251 | lookup_table_start = le64_to_cpu(sblk->lookup_table_start); |
@@ -261,7 +261,7 @@ allocate_id_index_table: | |||
261 | msblk->inode_lookup_table = NULL; | 261 | msblk->inode_lookup_table = NULL; |
262 | goto failed_mount; | 262 | goto failed_mount; |
263 | } | 263 | } |
264 | next_table = msblk->inode_lookup_table[0]; | 264 | next_table = le64_to_cpu(msblk->inode_lookup_table[0]); |
265 | 265 | ||
266 | sb->s_export_op = &squashfs_export_ops; | 266 | sb->s_export_op = &squashfs_export_ops; |
267 | 267 | ||
@@ -286,7 +286,7 @@ handle_fragments: | |||
286 | msblk->fragment_index = NULL; | 286 | msblk->fragment_index = NULL; |
287 | goto failed_mount; | 287 | goto failed_mount; |
288 | } | 288 | } |
289 | next_table = msblk->fragment_index[0]; | 289 | next_table = le64_to_cpu(msblk->fragment_index[0]); |
290 | 290 | ||
291 | check_directory_table: | 291 | check_directory_table: |
292 | /* Sanity check directory_table */ | 292 | /* Sanity check directory_table */ |
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c index e2cc6756f3b1..e474fbcf8bde 100644 --- a/fs/sysv/namei.c +++ b/fs/sysv/namei.c | |||
@@ -196,8 +196,6 @@ static int sysv_rmdir(struct inode * dir, struct dentry * dentry) | |||
196 | struct inode *inode = dentry->d_inode; | 196 | struct inode *inode = dentry->d_inode; |
197 | int err = -ENOTEMPTY; | 197 | int err = -ENOTEMPTY; |
198 | 198 | ||
199 | dentry_unhash(dentry); | ||
200 | |||
201 | if (sysv_empty_dir(inode)) { | 199 | if (sysv_empty_dir(inode)) { |
202 | err = sysv_unlink(dir, dentry); | 200 | err = sysv_unlink(dir, dentry); |
203 | if (!err) { | 201 | if (!err) { |
@@ -224,9 +222,6 @@ static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry, | |||
224 | struct sysv_dir_entry * old_de; | 222 | struct sysv_dir_entry * old_de; |
225 | int err = -ENOENT; | 223 | int err = -ENOENT; |
226 | 224 | ||
227 | if (new_inode && S_ISDIR(new_inode->i_mode)) | ||
228 | dentry_unhash(new_dentry); | ||
229 | |||
230 | old_de = sysv_find_entry(old_dentry, &old_page); | 225 | old_de = sysv_find_entry(old_dentry, &old_page); |
231 | if (!old_de) | 226 | if (!old_de) |
232 | goto out; | 227 | goto out; |
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index c2b80943560d..ef5abd38f0bf 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c | |||
@@ -656,8 +656,6 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry) | |||
656 | struct ubifs_inode *dir_ui = ubifs_inode(dir); | 656 | struct ubifs_inode *dir_ui = ubifs_inode(dir); |
657 | struct ubifs_budget_req req = { .mod_dent = 1, .dirtied_ino = 2 }; | 657 | struct ubifs_budget_req req = { .mod_dent = 1, .dirtied_ino = 2 }; |
658 | 658 | ||
659 | dentry_unhash(dentry); | ||
660 | |||
661 | /* | 659 | /* |
662 | * Budget request settings: deletion direntry, deletion inode and | 660 | * Budget request settings: deletion direntry, deletion inode and |
663 | * changing the parent inode. If budgeting fails, go ahead anyway | 661 | * changing the parent inode. If budgeting fails, go ahead anyway |
@@ -978,9 +976,6 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
978 | .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) }; | 976 | .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) }; |
979 | struct timespec time; | 977 | struct timespec time; |
980 | 978 | ||
981 | if (new_inode && S_ISDIR(new_inode->i_mode)) | ||
982 | dentry_unhash(new_dentry); | ||
983 | |||
984 | /* | 979 | /* |
985 | * Budget request settings: deletion direntry, new direntry, removing | 980 | * Budget request settings: deletion direntry, new direntry, removing |
986 | * the old inode, and changing old and new parent directory inodes. | 981 | * the old inode, and changing old and new parent directory inodes. |
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index 166951e0dcd3..3be645e012c9 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c | |||
@@ -581,6 +581,7 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) | |||
581 | ubifs_assert(wbuf->size % c->min_io_size == 0); | 581 | ubifs_assert(wbuf->size % c->min_io_size == 0); |
582 | ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); | 582 | ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); |
583 | ubifs_assert(!c->ro_media && !c->ro_mount); | 583 | ubifs_assert(!c->ro_media && !c->ro_mount); |
584 | ubifs_assert(!c->space_fixup); | ||
584 | if (c->leb_size - wbuf->offs >= c->max_write_size) | 585 | if (c->leb_size - wbuf->offs >= c->max_write_size) |
585 | ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size)); | 586 | ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size)); |
586 | 587 | ||
@@ -759,6 +760,7 @@ int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum, | |||
759 | ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); | 760 | ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); |
760 | ubifs_assert(offs % c->min_io_size == 0 && offs < c->leb_size); | 761 | ubifs_assert(offs % c->min_io_size == 0 && offs < c->leb_size); |
761 | ubifs_assert(!c->ro_media && !c->ro_mount); | 762 | ubifs_assert(!c->ro_media && !c->ro_mount); |
763 | ubifs_assert(!c->space_fixup); | ||
762 | 764 | ||
763 | if (c->ro_error) | 765 | if (c->ro_error) |
764 | return -EROFS; | 766 | return -EROFS; |
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index 34b1679e6e3a..cef0460f4c54 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c | |||
@@ -669,6 +669,7 @@ out_free: | |||
669 | 669 | ||
670 | out_release: | 670 | out_release: |
671 | release_head(c, BASEHD); | 671 | release_head(c, BASEHD); |
672 | kfree(dent); | ||
672 | out_ro: | 673 | out_ro: |
673 | ubifs_ro_mode(c, err); | 674 | ubifs_ro_mode(c, err); |
674 | if (last_reference) | 675 | if (last_reference) |
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c index bd644bf587a8..a5422fffbd69 100644 --- a/fs/ubifs/orphan.c +++ b/fs/ubifs/orphan.c | |||
@@ -674,7 +674,7 @@ static int kill_orphans(struct ubifs_info *c) | |||
674 | if (IS_ERR(sleb)) { | 674 | if (IS_ERR(sleb)) { |
675 | if (PTR_ERR(sleb) == -EUCLEAN) | 675 | if (PTR_ERR(sleb) == -EUCLEAN) |
676 | sleb = ubifs_recover_leb(c, lnum, 0, | 676 | sleb = ubifs_recover_leb(c, lnum, 0, |
677 | c->sbuf, 0); | 677 | c->sbuf, -1); |
678 | if (IS_ERR(sleb)) { | 678 | if (IS_ERR(sleb)) { |
679 | err = PTR_ERR(sleb); | 679 | err = PTR_ERR(sleb); |
680 | break; | 680 | break; |
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 731d9e2e7b50..783d8e0beb76 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c | |||
@@ -564,19 +564,15 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, | |||
564 | } | 564 | } |
565 | 565 | ||
566 | /** | 566 | /** |
567 | * drop_last_node - drop the last node or group of nodes. | 567 | * drop_last_group - drop the last group of nodes. |
568 | * @sleb: scanned LEB information | 568 | * @sleb: scanned LEB information |
569 | * @offs: offset of dropped nodes is returned here | 569 | * @offs: offset of dropped nodes is returned here |
570 | * @grouped: non-zero if whole group of nodes have to be dropped | ||
571 | * | 570 | * |
572 | * This is a helper function for 'ubifs_recover_leb()' which drops the last | 571 | * This is a helper function for 'ubifs_recover_leb()' which drops the last |
573 | * node of the scanned LEB or the last group of nodes if @grouped is not zero. | 572 | * group of nodes of the scanned LEB. |
574 | * This function returns %1 if a node was dropped and %0 otherwise. | ||
575 | */ | 573 | */ |
576 | static int drop_last_node(struct ubifs_scan_leb *sleb, int *offs, int grouped) | 574 | static void drop_last_group(struct ubifs_scan_leb *sleb, int *offs) |
577 | { | 575 | { |
578 | int dropped = 0; | ||
579 | |||
580 | while (!list_empty(&sleb->nodes)) { | 576 | while (!list_empty(&sleb->nodes)) { |
581 | struct ubifs_scan_node *snod; | 577 | struct ubifs_scan_node *snod; |
582 | struct ubifs_ch *ch; | 578 | struct ubifs_ch *ch; |
@@ -585,17 +581,40 @@ static int drop_last_node(struct ubifs_scan_leb *sleb, int *offs, int grouped) | |||
585 | list); | 581 | list); |
586 | ch = snod->node; | 582 | ch = snod->node; |
587 | if (ch->group_type != UBIFS_IN_NODE_GROUP) | 583 | if (ch->group_type != UBIFS_IN_NODE_GROUP) |
588 | return dropped; | 584 | break; |
589 | dbg_rcvry("dropping node at %d:%d", sleb->lnum, snod->offs); | 585 | |
586 | dbg_rcvry("dropping grouped node at %d:%d", | ||
587 | sleb->lnum, snod->offs); | ||
588 | *offs = snod->offs; | ||
589 | list_del(&snod->list); | ||
590 | kfree(snod); | ||
591 | sleb->nodes_cnt -= 1; | ||
592 | } | ||
593 | } | ||
594 | |||
595 | /** | ||
596 | * drop_last_node - drop the last node. | ||
597 | * @sleb: scanned LEB information | ||
598 | * @offs: offset of dropped nodes is returned here | ||
599 | * @grouped: non-zero if whole group of nodes have to be dropped | ||
600 | * | ||
601 | * This is a helper function for 'ubifs_recover_leb()' which drops the last | ||
602 | * node of the scanned LEB. | ||
603 | */ | ||
604 | static void drop_last_node(struct ubifs_scan_leb *sleb, int *offs) | ||
605 | { | ||
606 | struct ubifs_scan_node *snod; | ||
607 | |||
608 | if (!list_empty(&sleb->nodes)) { | ||
609 | snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, | ||
610 | list); | ||
611 | |||
612 | dbg_rcvry("dropping last node at %d:%d", sleb->lnum, snod->offs); | ||
590 | *offs = snod->offs; | 613 | *offs = snod->offs; |
591 | list_del(&snod->list); | 614 | list_del(&snod->list); |
592 | kfree(snod); | 615 | kfree(snod); |
593 | sleb->nodes_cnt -= 1; | 616 | sleb->nodes_cnt -= 1; |
594 | dropped = 1; | ||
595 | if (!grouped) | ||
596 | break; | ||
597 | } | 617 | } |
598 | return dropped; | ||
599 | } | 618 | } |
600 | 619 | ||
601 | /** | 620 | /** |
@@ -604,7 +623,8 @@ static int drop_last_node(struct ubifs_scan_leb *sleb, int *offs, int grouped) | |||
604 | * @lnum: LEB number | 623 | * @lnum: LEB number |
605 | * @offs: offset | 624 | * @offs: offset |
606 | * @sbuf: LEB-sized buffer to use | 625 | * @sbuf: LEB-sized buffer to use |
607 | * @grouped: nodes may be grouped for recovery | 626 | * @jhead: journal head number this LEB belongs to (%-1 if the LEB does not |
627 | * belong to any journal head) | ||
608 | * | 628 | * |
609 | * This function does a scan of a LEB, but caters for errors that might have | 629 | * This function does a scan of a LEB, but caters for errors that might have |
610 | * been caused by the unclean unmount from which we are attempting to recover. | 630 | * been caused by the unclean unmount from which we are attempting to recover. |
@@ -612,13 +632,14 @@ static int drop_last_node(struct ubifs_scan_leb *sleb, int *offs, int grouped) | |||
612 | * found, and a negative error code in case of failure. | 632 | * found, and a negative error code in case of failure. |
613 | */ | 633 | */ |
614 | struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, | 634 | struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, |
615 | int offs, void *sbuf, int grouped) | 635 | int offs, void *sbuf, int jhead) |
616 | { | 636 | { |
617 | int ret = 0, err, len = c->leb_size - offs, start = offs, min_io_unit; | 637 | int ret = 0, err, len = c->leb_size - offs, start = offs, min_io_unit; |
638 | int grouped = jhead == -1 ? 0 : c->jheads[jhead].grouped; | ||
618 | struct ubifs_scan_leb *sleb; | 639 | struct ubifs_scan_leb *sleb; |
619 | void *buf = sbuf + offs; | 640 | void *buf = sbuf + offs; |
620 | 641 | ||
621 | dbg_rcvry("%d:%d", lnum, offs); | 642 | dbg_rcvry("%d:%d, jhead %d, grouped %d", lnum, offs, jhead, grouped); |
622 | 643 | ||
623 | sleb = ubifs_start_scan(c, lnum, offs, sbuf); | 644 | sleb = ubifs_start_scan(c, lnum, offs, sbuf); |
624 | if (IS_ERR(sleb)) | 645 | if (IS_ERR(sleb)) |
@@ -635,7 +656,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, | |||
635 | * Scan quietly until there is an error from which we cannot | 656 | * Scan quietly until there is an error from which we cannot |
636 | * recover | 657 | * recover |
637 | */ | 658 | */ |
638 | ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0); | 659 | ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1); |
639 | if (ret == SCANNED_A_NODE) { | 660 | if (ret == SCANNED_A_NODE) { |
640 | /* A valid node, and not a padding node */ | 661 | /* A valid node, and not a padding node */ |
641 | struct ubifs_ch *ch = buf; | 662 | struct ubifs_ch *ch = buf; |
@@ -695,59 +716,62 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, | |||
695 | * If nodes are grouped, always drop the incomplete group at | 716 | * If nodes are grouped, always drop the incomplete group at |
696 | * the end. | 717 | * the end. |
697 | */ | 718 | */ |
698 | drop_last_node(sleb, &offs, 1); | 719 | drop_last_group(sleb, &offs); |
699 | 720 | ||
700 | /* | 721 | if (jhead == GCHD) { |
701 | * While we are in the middle of the same min. I/O unit keep dropping | 722 | /* |
702 | * nodes. So basically, what we want is to make sure that the last min. | 723 | * If this LEB belongs to the GC head then while we are in the |
703 | * I/O unit where we saw the corruption is dropped completely with all | 724 | * middle of the same min. I/O unit keep dropping nodes. So |
704 | * the uncorrupted node which may possibly sit there. | 725 | * basically, what we want is to make sure that the last min. |
705 | * | 726 | * I/O unit where we saw the corruption is dropped completely |
706 | * In other words, let's name the min. I/O unit where the corruption | 727 | * with all the uncorrupted nodes which may possibly sit there. |
707 | * starts B, and the previous min. I/O unit A. The below code tries to | 728 | * |
708 | * deal with a situation when half of B contains valid nodes or the end | 729 | * In other words, let's name the min. I/O unit where the |
709 | * of a valid node, and the second half of B contains corrupted data or | 730 | * corruption starts B, and the previous min. I/O unit A. The |
710 | * garbage. This means that UBIFS had been writing to B just before the | 731 | * below code tries to deal with a situation when half of B |
711 | * power cut happened. I do not know how realistic is this scenario | 732 | * contains valid nodes or the end of a valid node, and the |
712 | * that half of the min. I/O unit had been written successfully and the | 733 | * second half of B contains corrupted data or garbage. This |
713 | * other half not, but this is possible in our 'failure mode emulation' | 734 | * means that UBIFS had been writing to B just before the power |
714 | * infrastructure at least. | 735 | * cut happened. I do not know how realistic is this scenario |
715 | * | 736 | * that half of the min. I/O unit had been written successfully |
716 | * So what is the problem, why we need to drop those nodes? Whey can't | 737 | * and the other half not, but this is possible in our 'failure |
717 | * we just clean-up the second half of B by putting a padding node | 738 | * mode emulation' infrastructure at least. |
718 | * there? We can, and this works fine with one exception which was | 739 | * |
719 | * reproduced with power cut emulation testing and happens extremely | 740 | * So what is the problem, why we need to drop those nodes? Why |
720 | * rarely. The description follows, but it is worth noting that that is | 741 | * can't we just clean-up the second half of B by putting a |
721 | * only about the GC head, so we could do this trick only if the bud | 742 | * padding node there? We can, and this works fine with one |
722 | * belongs to the GC head, but it does not seem to be worth an | 743 | * exception which was reproduced with power cut emulation |
723 | * additional "if" statement. | 744 | * testing and happens extremely rarely. |
724 | * | 745 | * |
725 | * So, imagine the file-system is full, we run GC which is moving valid | 746 | * Imagine the file-system is full, we run GC which starts |
726 | * nodes from LEB X to LEB Y (obviously, LEB Y is the current GC head | 747 | * moving valid nodes from LEB X to LEB Y (obviously, LEB Y is |
727 | * LEB). The @c->gc_lnum is -1, which means that GC will retain LEB X | 748 | * the current GC head LEB). The @c->gc_lnum is -1, which means |
728 | * and will try to continue. Imagine that LEB X is currently the | 749 | * that GC will retain LEB X and will try to continue. Imagine |
729 | * dirtiest LEB, and the amount of used space in LEB Y is exactly the | 750 | * that LEB X is currently the dirtiest LEB, and the amount of |
730 | * same as amount of free space in LEB X. | 751 | * used space in LEB Y is exactly the same as amount of free |
731 | * | 752 | * space in LEB X. |
732 | * And a power cut happens when nodes are moved from LEB X to LEB Y. We | 753 | * |
733 | * are here trying to recover LEB Y which is the GC head LEB. We find | 754 | * And a power cut happens when nodes are moved from LEB X to |
734 | * the min. I/O unit B as described above. Then we clean-up LEB Y by | 755 | * LEB Y. We are here trying to recover LEB Y which is the GC |
735 | * padding min. I/O unit. And later 'ubifs_rcvry_gc_commit()' function | 756 | * head LEB. We find the min. I/O unit B as described above. |
736 | * fails, because it cannot find a dirty LEB which could be GC'd into | 757 | * Then we clean-up LEB Y by padding min. I/O unit. And later |
737 | * LEB Y! Even LEB X does not match because the amount of valid nodes | 758 | * 'ubifs_rcvry_gc_commit()' function fails, because it cannot |
738 | * there does not fit the free space in LEB Y any more! And this is | 759 | * find a dirty LEB which could be GC'd into LEB Y! Even LEB X |
739 | * because of the padding node which we added to LEB Y. The | 760 | * does not match because the amount of valid nodes there does |
740 | * user-visible effect of this which I once observed and analysed is | 761 | * not fit the free space in LEB Y any more! And this is |
741 | * that we cannot mount the file-system with -ENOSPC error. | 762 | * because of the padding node which we added to LEB Y. The |
742 | * | 763 | * user-visible effect of this which I once observed and |
743 | * So obviously, to make sure that situation does not happen we should | 764 | * analysed is that we cannot mount the file-system with |
744 | * free min. I/O unit B in LEB Y completely and the last used min. I/O | 765 | * -ENOSPC error. |
745 | * unit in LEB Y should be A. This is basically what the below code | 766 | * |
746 | * tries to do. | 767 | * So obviously, to make sure that situation does not happen we |
747 | */ | 768 | * should free min. I/O unit B in LEB Y completely and the last |
748 | while (min_io_unit == round_down(offs, c->min_io_size) && | 769 | * used min. I/O unit in LEB Y should be A. This is basically |
749 | min_io_unit != offs && | 770 | * what the below code tries to do. |
750 | drop_last_node(sleb, &offs, grouped)); | 771 | */ |
772 | while (offs > min_io_unit) | ||
773 | drop_last_node(sleb, &offs); | ||
774 | } | ||
751 | 775 | ||
752 | buf = sbuf + offs; | 776 | buf = sbuf + offs; |
753 | len = c->leb_size - offs; | 777 | len = c->leb_size - offs; |
@@ -881,7 +905,7 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, | |||
881 | } | 905 | } |
882 | ubifs_scan_destroy(sleb); | 906 | ubifs_scan_destroy(sleb); |
883 | } | 907 | } |
884 | return ubifs_recover_leb(c, lnum, offs, sbuf, 0); | 908 | return ubifs_recover_leb(c, lnum, offs, sbuf, -1); |
885 | } | 909 | } |
886 | 910 | ||
887 | /** | 911 | /** |
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index 6617280d1679..5e97161ce4d3 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c | |||
@@ -557,8 +557,7 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b) | |||
557 | * these LEBs could possibly be written to at the power cut | 557 | * these LEBs could possibly be written to at the power cut |
558 | * time. | 558 | * time. |
559 | */ | 559 | */ |
560 | sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, | 560 | sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, b->bud->jhead); |
561 | b->bud->jhead != GCHD); | ||
562 | else | 561 | else |
563 | sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0); | 562 | sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0); |
564 | if (IS_ERR(sleb)) | 563 | if (IS_ERR(sleb)) |
diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c index 46961c003236..9e1d05666fed 100644 --- a/fs/ubifs/shrinker.c +++ b/fs/ubifs/shrinker.c | |||
@@ -277,13 +277,18 @@ static int kick_a_thread(void) | |||
277 | return 0; | 277 | return 0; |
278 | } | 278 | } |
279 | 279 | ||
280 | int ubifs_shrinker(struct shrinker *shrink, int nr, gfp_t gfp_mask) | 280 | int ubifs_shrinker(struct shrinker *shrink, struct shrink_control *sc) |
281 | { | 281 | { |
282 | int nr = sc->nr_to_scan; | ||
282 | int freed, contention = 0; | 283 | int freed, contention = 0; |
283 | long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); | 284 | long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); |
284 | 285 | ||
285 | if (nr == 0) | 286 | if (nr == 0) |
286 | return clean_zn_cnt; | 287 | /* |
288 | * Due to the way UBIFS updates the clean znode counter it may | ||
289 | * temporarily be negative. | ||
290 | */ | ||
291 | return clean_zn_cnt >= 0 ? clean_zn_cnt : 1; | ||
287 | 292 | ||
288 | if (!clean_zn_cnt) { | 293 | if (!clean_zn_cnt) { |
289 | /* | 294 | /* |
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 6db0bdaa9f74..b5aeb5a8ebed 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c | |||
@@ -382,7 +382,7 @@ done: | |||
382 | end_writeback(inode); | 382 | end_writeback(inode); |
383 | } | 383 | } |
384 | 384 | ||
385 | static void ubifs_dirty_inode(struct inode *inode) | 385 | static void ubifs_dirty_inode(struct inode *inode, int flags) |
386 | { | 386 | { |
387 | struct ubifs_inode *ui = ubifs_inode(inode); | 387 | struct ubifs_inode *ui = ubifs_inode(inode); |
388 | 388 | ||
@@ -811,15 +811,18 @@ static int alloc_wbufs(struct ubifs_info *c) | |||
811 | 811 | ||
812 | c->jheads[i].wbuf.sync_callback = &bud_wbuf_callback; | 812 | c->jheads[i].wbuf.sync_callback = &bud_wbuf_callback; |
813 | c->jheads[i].wbuf.jhead = i; | 813 | c->jheads[i].wbuf.jhead = i; |
814 | c->jheads[i].grouped = 1; | ||
814 | } | 815 | } |
815 | 816 | ||
816 | c->jheads[BASEHD].wbuf.dtype = UBI_SHORTTERM; | 817 | c->jheads[BASEHD].wbuf.dtype = UBI_SHORTTERM; |
817 | /* | 818 | /* |
818 | * Garbage Collector head likely contains long-term data and | 819 | * Garbage Collector head likely contains long-term data and |
819 | * does not need to be synchronized by timer. | 820 | * does not need to be synchronized by timer. Also GC head nodes are |
821 | * not grouped. | ||
820 | */ | 822 | */ |
821 | c->jheads[GCHD].wbuf.dtype = UBI_LONGTERM; | 823 | c->jheads[GCHD].wbuf.dtype = UBI_LONGTERM; |
822 | c->jheads[GCHD].wbuf.no_timer = 1; | 824 | c->jheads[GCHD].wbuf.no_timer = 1; |
825 | c->jheads[GCHD].grouped = 0; | ||
823 | 826 | ||
824 | return 0; | 827 | return 0; |
825 | } | 828 | } |
@@ -1284,12 +1287,25 @@ static int mount_ubifs(struct ubifs_info *c) | |||
1284 | if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) { | 1287 | if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) { |
1285 | ubifs_msg("recovery needed"); | 1288 | ubifs_msg("recovery needed"); |
1286 | c->need_recovery = 1; | 1289 | c->need_recovery = 1; |
1287 | if (!c->ro_mount) { | 1290 | } |
1288 | err = ubifs_recover_inl_heads(c, c->sbuf); | 1291 | |
1289 | if (err) | 1292 | if (c->need_recovery && !c->ro_mount) { |
1290 | goto out_master; | 1293 | err = ubifs_recover_inl_heads(c, c->sbuf); |
1291 | } | 1294 | if (err) |
1292 | } else if (!c->ro_mount) { | 1295 | goto out_master; |
1296 | } | ||
1297 | |||
1298 | err = ubifs_lpt_init(c, 1, !c->ro_mount); | ||
1299 | if (err) | ||
1300 | goto out_master; | ||
1301 | |||
1302 | if (!c->ro_mount && c->space_fixup) { | ||
1303 | err = ubifs_fixup_free_space(c); | ||
1304 | if (err) | ||
1305 | goto out_master; | ||
1306 | } | ||
1307 | |||
1308 | if (!c->ro_mount) { | ||
1293 | /* | 1309 | /* |
1294 | * Set the "dirty" flag so that if we reboot uncleanly we | 1310 | * Set the "dirty" flag so that if we reboot uncleanly we |
1295 | * will notice this immediately on the next mount. | 1311 | * will notice this immediately on the next mount. |
@@ -1297,13 +1313,9 @@ static int mount_ubifs(struct ubifs_info *c) | |||
1297 | c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); | 1313 | c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); |
1298 | err = ubifs_write_master(c); | 1314 | err = ubifs_write_master(c); |
1299 | if (err) | 1315 | if (err) |
1300 | goto out_master; | 1316 | goto out_lpt; |
1301 | } | 1317 | } |
1302 | 1318 | ||
1303 | err = ubifs_lpt_init(c, 1, !c->ro_mount); | ||
1304 | if (err) | ||
1305 | goto out_lpt; | ||
1306 | |||
1307 | err = dbg_check_idx_size(c, c->bi.old_idx_sz); | 1319 | err = dbg_check_idx_size(c, c->bi.old_idx_sz); |
1308 | if (err) | 1320 | if (err) |
1309 | goto out_lpt; | 1321 | goto out_lpt; |
@@ -1396,12 +1408,6 @@ static int mount_ubifs(struct ubifs_info *c) | |||
1396 | } else | 1408 | } else |
1397 | ubifs_assert(c->lst.taken_empty_lebs > 0); | 1409 | ubifs_assert(c->lst.taken_empty_lebs > 0); |
1398 | 1410 | ||
1399 | if (!c->ro_mount && c->space_fixup) { | ||
1400 | err = ubifs_fixup_free_space(c); | ||
1401 | if (err) | ||
1402 | goto out_infos; | ||
1403 | } | ||
1404 | |||
1405 | err = dbg_check_filesystem(c); | 1411 | err = dbg_check_filesystem(c); |
1406 | if (err) | 1412 | if (err) |
1407 | goto out_infos; | 1413 | goto out_infos; |
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index 8119b1fd8d94..91b4213dde84 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c | |||
@@ -2876,12 +2876,13 @@ static void tnc_destroy_cnext(struct ubifs_info *c) | |||
2876 | */ | 2876 | */ |
2877 | void ubifs_tnc_close(struct ubifs_info *c) | 2877 | void ubifs_tnc_close(struct ubifs_info *c) |
2878 | { | 2878 | { |
2879 | long clean_freed; | ||
2880 | |||
2881 | tnc_destroy_cnext(c); | 2879 | tnc_destroy_cnext(c); |
2882 | if (c->zroot.znode) { | 2880 | if (c->zroot.znode) { |
2883 | clean_freed = ubifs_destroy_tnc_subtree(c->zroot.znode); | 2881 | long n; |
2884 | atomic_long_sub(clean_freed, &ubifs_clean_zn_cnt); | 2882 | |
2883 | ubifs_destroy_tnc_subtree(c->zroot.znode); | ||
2884 | n = atomic_long_read(&c->clean_zn_cnt); | ||
2885 | atomic_long_sub(n, &ubifs_clean_zn_cnt); | ||
2885 | } | 2886 | } |
2886 | kfree(c->gap_lebs); | 2887 | kfree(c->gap_lebs); |
2887 | kfree(c->ilebs); | 2888 | kfree(c->ilebs); |
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 93d1412a06f0..f79983d6f860 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h | |||
@@ -722,12 +722,14 @@ struct ubifs_bud { | |||
722 | * struct ubifs_jhead - journal head. | 722 | * struct ubifs_jhead - journal head. |
723 | * @wbuf: head's write-buffer | 723 | * @wbuf: head's write-buffer |
724 | * @buds_list: list of bud LEBs belonging to this journal head | 724 | * @buds_list: list of bud LEBs belonging to this journal head |
725 | * @grouped: non-zero if UBIFS groups nodes when writing to this journal head | ||
725 | * | 726 | * |
726 | * Note, the @buds list is protected by the @c->buds_lock. | 727 | * Note, the @buds list is protected by the @c->buds_lock. |
727 | */ | 728 | */ |
728 | struct ubifs_jhead { | 729 | struct ubifs_jhead { |
729 | struct ubifs_wbuf wbuf; | 730 | struct ubifs_wbuf wbuf; |
730 | struct list_head buds_list; | 731 | struct list_head buds_list; |
732 | unsigned int grouped:1; | ||
731 | }; | 733 | }; |
732 | 734 | ||
733 | /** | 735 | /** |
@@ -1614,7 +1616,7 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot); | |||
1614 | int ubifs_tnc_end_commit(struct ubifs_info *c); | 1616 | int ubifs_tnc_end_commit(struct ubifs_info *c); |
1615 | 1617 | ||
1616 | /* shrinker.c */ | 1618 | /* shrinker.c */ |
1617 | int ubifs_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask); | 1619 | int ubifs_shrinker(struct shrinker *shrink, struct shrink_control *sc); |
1618 | 1620 | ||
1619 | /* commit.c */ | 1621 | /* commit.c */ |
1620 | int ubifs_bg_thread(void *info); | 1622 | int ubifs_bg_thread(void *info); |
@@ -1742,7 +1744,7 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum); | |||
1742 | int ubifs_recover_master_node(struct ubifs_info *c); | 1744 | int ubifs_recover_master_node(struct ubifs_info *c); |
1743 | int ubifs_write_rcvrd_mst_node(struct ubifs_info *c); | 1745 | int ubifs_write_rcvrd_mst_node(struct ubifs_info *c); |
1744 | struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, | 1746 | struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, |
1745 | int offs, void *sbuf, int grouped); | 1747 | int offs, void *sbuf, int jhead); |
1746 | struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, | 1748 | struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, |
1747 | int offs, void *sbuf); | 1749 | int offs, void *sbuf); |
1748 | int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf); | 1750 | int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf); |
diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 4d76594c2a8f..f1dce848ef96 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c | |||
@@ -783,8 +783,6 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry) | |||
783 | struct fileIdentDesc *fi, cfi; | 783 | struct fileIdentDesc *fi, cfi; |
784 | struct kernel_lb_addr tloc; | 784 | struct kernel_lb_addr tloc; |
785 | 785 | ||
786 | dentry_unhash(dentry); | ||
787 | |||
788 | retval = -ENOENT; | 786 | retval = -ENOENT; |
789 | fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi); | 787 | fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi); |
790 | if (!fi) | 788 | if (!fi) |
@@ -1083,9 +1081,6 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1083 | struct kernel_lb_addr tloc; | 1081 | struct kernel_lb_addr tloc; |
1084 | struct udf_inode_info *old_iinfo = UDF_I(old_inode); | 1082 | struct udf_inode_info *old_iinfo = UDF_I(old_inode); |
1085 | 1083 | ||
1086 | if (new_inode && S_ISDIR(new_inode->i_mode)) | ||
1087 | dentry_unhash(new_dentry); | ||
1088 | |||
1089 | ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi); | 1084 | ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi); |
1090 | if (ofi) { | 1085 | if (ofi) { |
1091 | if (ofibh.sbh != ofibh.ebh) | 1086 | if (ofibh.sbh != ofibh.ebh) |
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index 953ebdfc5bf7..29309e25417f 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c | |||
@@ -258,8 +258,6 @@ static int ufs_rmdir (struct inode * dir, struct dentry *dentry) | |||
258 | struct inode * inode = dentry->d_inode; | 258 | struct inode * inode = dentry->d_inode; |
259 | int err= -ENOTEMPTY; | 259 | int err= -ENOTEMPTY; |
260 | 260 | ||
261 | dentry_unhash(dentry); | ||
262 | |||
263 | lock_ufs(dir->i_sb); | 261 | lock_ufs(dir->i_sb); |
264 | if (ufs_empty_dir (inode)) { | 262 | if (ufs_empty_dir (inode)) { |
265 | err = ufs_unlink(dir, dentry); | 263 | err = ufs_unlink(dir, dentry); |
@@ -284,9 +282,6 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
284 | struct ufs_dir_entry *old_de; | 282 | struct ufs_dir_entry *old_de; |
285 | int err = -ENOENT; | 283 | int err = -ENOENT; |
286 | 284 | ||
287 | if (new_inode && S_ISDIR(new_inode->i_mode)) | ||
288 | dentry_unhash(new_dentry); | ||
289 | |||
290 | old_de = ufs_find_entry(old_dir, &old_dentry->d_name, &old_page); | 285 | old_de = ufs_find_entry(old_dir, &old_dentry->d_name, &old_page); |
291 | if (!old_de) | 286 | if (!old_de) |
292 | goto out; | 287 | goto out; |
diff --git a/fs/xattr.c b/fs/xattr.c index f1ef94974dea..f060663ab70c 100644 --- a/fs/xattr.c +++ b/fs/xattr.c | |||
@@ -46,18 +46,22 @@ xattr_permission(struct inode *inode, const char *name, int mask) | |||
46 | return 0; | 46 | return 0; |
47 | 47 | ||
48 | /* | 48 | /* |
49 | * The trusted.* namespace can only be accessed by a privileged user. | 49 | * The trusted.* namespace can only be accessed by privileged users. |
50 | */ | 50 | */ |
51 | if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) | 51 | if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) { |
52 | return (capable(CAP_SYS_ADMIN) ? 0 : -EPERM); | 52 | if (!capable(CAP_SYS_ADMIN)) |
53 | return (mask & MAY_WRITE) ? -EPERM : -ENODATA; | ||
54 | return 0; | ||
55 | } | ||
53 | 56 | ||
54 | /* In user.* namespace, only regular files and directories can have | 57 | /* |
58 | * In the user.* namespace, only regular files and directories can have | ||
55 | * extended attributes. For sticky directories, only the owner and | 59 | * extended attributes. For sticky directories, only the owner and |
56 | * privileged user can write attributes. | 60 | * privileged users can write attributes. |
57 | */ | 61 | */ |
58 | if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) { | 62 | if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) { |
59 | if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) | 63 | if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) |
60 | return -EPERM; | 64 | return (mask & MAY_WRITE) ? -EPERM : -ENODATA; |
61 | if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) && | 65 | if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) && |
62 | (mask & MAY_WRITE) && !inode_owner_or_capable(inode)) | 66 | (mask & MAY_WRITE) && !inode_owner_or_capable(inode)) |
63 | return -EPERM; | 67 | return -EPERM; |
@@ -87,7 +91,11 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name, | |||
87 | { | 91 | { |
88 | struct inode *inode = dentry->d_inode; | 92 | struct inode *inode = dentry->d_inode; |
89 | int error = -EOPNOTSUPP; | 93 | int error = -EOPNOTSUPP; |
94 | int issec = !strncmp(name, XATTR_SECURITY_PREFIX, | ||
95 | XATTR_SECURITY_PREFIX_LEN); | ||
90 | 96 | ||
97 | if (issec) | ||
98 | inode->i_flags &= ~S_NOSEC; | ||
91 | if (inode->i_op->setxattr) { | 99 | if (inode->i_op->setxattr) { |
92 | error = inode->i_op->setxattr(dentry, name, value, size, flags); | 100 | error = inode->i_op->setxattr(dentry, name, value, size, flags); |
93 | if (!error) { | 101 | if (!error) { |
@@ -95,8 +103,7 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name, | |||
95 | security_inode_post_setxattr(dentry, name, value, | 103 | security_inode_post_setxattr(dentry, name, value, |
96 | size, flags); | 104 | size, flags); |
97 | } | 105 | } |
98 | } else if (!strncmp(name, XATTR_SECURITY_PREFIX, | 106 | } else if (issec) { |
99 | XATTR_SECURITY_PREFIX_LEN)) { | ||
100 | const char *suffix = name + XATTR_SECURITY_PREFIX_LEN; | 107 | const char *suffix = name + XATTR_SECURITY_PREFIX_LEN; |
101 | error = security_inode_setsecurity(inode, suffix, value, | 108 | error = security_inode_setsecurity(inode, suffix, value, |
102 | size, flags); | 109 | size, flags); |
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 98b9c91fcdf1..1e3a7ce804dc 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
@@ -925,7 +925,8 @@ xfs_fs_inode_init_once( | |||
925 | */ | 925 | */ |
926 | STATIC void | 926 | STATIC void |
927 | xfs_fs_dirty_inode( | 927 | xfs_fs_dirty_inode( |
928 | struct inode *inode) | 928 | struct inode *inode, |
929 | int flags) | ||
929 | { | 930 | { |
930 | barrier(); | 931 | barrier(); |
931 | XFS_I(inode)->i_update_core = 1; | 932 | XFS_I(inode)->i_update_core = 1; |