aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorPaul Mundt <lethal@linux-sh.org>2011-08-08 00:45:28 -0400
committerPaul Mundt <lethal@linux-sh.org>2011-08-08 00:45:28 -0400
commit77c7ee51a062bb595c501ec098125a68999c20c3 (patch)
treec5060ca5786ef353e005dae04b61d2c49967284d /fs
parent1ba762209491e2496e58baffa3fd65d661f54404 (diff)
parent322a8b034003c0d46d39af85bf24fee27b902f48 (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux into sh-latest
Conflicts: drivers/tty/serial/sh-sci.c Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig15
-rw-r--r--fs/block_dev.c4
-rw-r--r--fs/btrfs/Makefile4
-rw-r--r--fs/btrfs/acl.c17
-rw-r--r--fs/btrfs/compression.c14
-rw-r--r--fs/btrfs/ctree.h30
-rw-r--r--fs/btrfs/dir-item.c30
-rw-r--r--fs/btrfs/extent-tree.c45
-rw-r--r--fs/btrfs/extent_io.c139
-rw-r--r--fs/btrfs/extent_io.h20
-rw-r--r--fs/btrfs/extent_map.c155
-rw-r--r--fs/btrfs/file-item.c7
-rw-r--r--fs/btrfs/file.c21
-rw-r--r--fs/btrfs/inode.c98
-rw-r--r--fs/btrfs/ioctl.c3
-rw-r--r--fs/btrfs/ref-cache.c68
-rw-r--r--fs/btrfs/ref-cache.h52
-rw-r--r--fs/btrfs/root-tree.c5
-rw-r--r--fs/btrfs/transaction.c65
-rw-r--r--fs/btrfs/tree-log.c12
-rw-r--r--fs/btrfs/volumes.c12
-rw-r--r--fs/cifs/cifs_dfs_ref.c5
-rw-r--r--fs/cifs/cifsfs.c4
-rw-r--r--fs/cifs/dns_resolve.c4
-rw-r--r--fs/cifs/inode.c14
-rw-r--r--fs/cifs/sess.c3
-rw-r--r--fs/cifs/transport.c2
-rw-r--r--fs/dcache.c3
-rw-r--r--fs/exofs/Kbuild5
-rw-r--r--fs/exofs/Kconfig4
-rw-r--r--fs/exofs/exofs.h159
-rw-r--r--fs/exofs/inode.c152
-rw-r--r--fs/exofs/ore.c (renamed from fs/exofs/ios.c)370
-rw-r--r--fs/exofs/pnfs.h45
-rw-r--r--fs/exofs/super.c251
-rw-r--r--fs/ext4/super.c2
-rw-r--r--fs/inode.c1
-rw-r--r--fs/namei.c93
-rw-r--r--fs/proc/base.c12
-rw-r--r--fs/stack.c5
-rw-r--r--fs/stat.c4
41 files changed, 874 insertions, 1080 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 19891aab9c6e..9fe0b349f4cd 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -127,14 +127,21 @@ config TMPFS_POSIX_ACL
127 select TMPFS_XATTR 127 select TMPFS_XATTR
128 select GENERIC_ACL 128 select GENERIC_ACL
129 help 129 help
130 POSIX Access Control Lists (ACLs) support permissions for users and 130 POSIX Access Control Lists (ACLs) support additional access rights
131 groups beyond the owner/group/world scheme. 131 for users and groups beyond the standard owner/group/world scheme,
132 and this option selects support for ACLs specifically for tmpfs
133 filesystems.
134
135 If you've selected TMPFS, it's possible that you'll also need
136 this option as there are a number of Linux distros that require
137 POSIX ACL support under /dev for certain features to work properly.
138 For example, some distros need this feature for ALSA-related /dev
139 files for sound to work properly. In short, if you're not sure,
140 say Y.
132 141
133 To learn more about Access Control Lists, visit the POSIX ACLs for 142 To learn more about Access Control Lists, visit the POSIX ACLs for
134 Linux website <http://acl.bestbits.at/>. 143 Linux website <http://acl.bestbits.at/>.
135 144
136 If you don't know what Access Control Lists are, say N.
137
138config TMPFS_XATTR 145config TMPFS_XATTR
139 bool "Tmpfs extended attributes" 146 bool "Tmpfs extended attributes"
140 depends on TMPFS 147 depends on TMPFS
diff --git a/fs/block_dev.c b/fs/block_dev.c
index f28680553288..ff77262e887c 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -387,6 +387,10 @@ int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
387 struct inode *bd_inode = filp->f_mapping->host; 387 struct inode *bd_inode = filp->f_mapping->host;
388 struct block_device *bdev = I_BDEV(bd_inode); 388 struct block_device *bdev = I_BDEV(bd_inode);
389 int error; 389 int error;
390
391 error = filemap_write_and_wait_range(filp->f_mapping, start, end);
392 if (error)
393 return error;
390 394
391 /* 395 /*
392 * There is no need to serialise calls to blkdev_issue_flush with 396 * There is no need to serialise calls to blkdev_issue_flush with
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 9b72dcf1cd25..40e6ac08c21f 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -6,5 +6,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
6 transaction.o inode.o file.o tree-defrag.o \ 6 transaction.o inode.o file.o tree-defrag.o \
7 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ 7 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
8 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ 8 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
9 export.o tree-log.o acl.o free-space-cache.o zlib.o lzo.o \ 9 export.o tree-log.o free-space-cache.o zlib.o lzo.o \
10 compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o 10 compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o
11
12btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 4cc5c0164ed6..eb159aaa5a11 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -28,8 +28,6 @@
28#include "btrfs_inode.h" 28#include "btrfs_inode.h"
29#include "xattr.h" 29#include "xattr.h"
30 30
31#ifdef CONFIG_BTRFS_FS_POSIX_ACL
32
33struct posix_acl *btrfs_get_acl(struct inode *inode, int type) 31struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
34{ 32{
35 int size; 33 int size;
@@ -276,18 +274,3 @@ const struct xattr_handler btrfs_xattr_acl_access_handler = {
276 .get = btrfs_xattr_acl_get, 274 .get = btrfs_xattr_acl_get,
277 .set = btrfs_xattr_acl_set, 275 .set = btrfs_xattr_acl_set,
278}; 276};
279
280#else /* CONFIG_BTRFS_FS_POSIX_ACL */
281
282int btrfs_acl_chmod(struct inode *inode)
283{
284 return 0;
285}
286
287int btrfs_init_acl(struct btrfs_trans_handle *trans,
288 struct inode *inode, struct inode *dir)
289{
290 return 0;
291}
292
293#endif /* CONFIG_BTRFS_FS_POSIX_ACL */
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index bfe42b03eaf9..8ec5d86f1734 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -338,6 +338,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
338 u64 first_byte = disk_start; 338 u64 first_byte = disk_start;
339 struct block_device *bdev; 339 struct block_device *bdev;
340 int ret; 340 int ret;
341 int skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
341 342
342 WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1)); 343 WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1));
343 cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); 344 cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
@@ -392,8 +393,11 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
392 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); 393 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
393 BUG_ON(ret); 394 BUG_ON(ret);
394 395
395 ret = btrfs_csum_one_bio(root, inode, bio, start, 1); 396 if (!skip_sum) {
396 BUG_ON(ret); 397 ret = btrfs_csum_one_bio(root, inode, bio,
398 start, 1);
399 BUG_ON(ret);
400 }
397 401
398 ret = btrfs_map_bio(root, WRITE, bio, 0, 1); 402 ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
399 BUG_ON(ret); 403 BUG_ON(ret);
@@ -418,8 +422,10 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
418 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); 422 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
419 BUG_ON(ret); 423 BUG_ON(ret);
420 424
421 ret = btrfs_csum_one_bio(root, inode, bio, start, 1); 425 if (!skip_sum) {
422 BUG_ON(ret); 426 ret = btrfs_csum_one_bio(root, inode, bio, start, 1);
427 BUG_ON(ret);
428 }
423 429
424 ret = btrfs_map_bio(root, WRITE, bio, 0, 1); 430 ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
425 BUG_ON(ret); 431 BUG_ON(ret);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 365c4e1dde04..0469263e327e 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2406,8 +2406,8 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct
2406 btrfs_root_item *item, struct btrfs_key *key); 2406 btrfs_root_item *item, struct btrfs_key *key);
2407int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid); 2407int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid);
2408int btrfs_find_orphan_roots(struct btrfs_root *tree_root); 2408int btrfs_find_orphan_roots(struct btrfs_root *tree_root);
2409int btrfs_set_root_node(struct btrfs_root_item *item, 2409void btrfs_set_root_node(struct btrfs_root_item *item,
2410 struct extent_buffer *node); 2410 struct extent_buffer *node);
2411void btrfs_check_and_init_root_item(struct btrfs_root_item *item); 2411void btrfs_check_and_init_root_item(struct btrfs_root_item *item);
2412 2412
2413/* dir-item.c */ 2413/* dir-item.c */
@@ -2523,6 +2523,14 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag
2523#define PageChecked PageFsMisc 2523#define PageChecked PageFsMisc
2524#endif 2524#endif
2525 2525
2526/* This forces readahead on a given range of bytes in an inode */
2527static inline void btrfs_force_ra(struct address_space *mapping,
2528 struct file_ra_state *ra, struct file *file,
2529 pgoff_t offset, unsigned long req_size)
2530{
2531 page_cache_sync_readahead(mapping, ra, file, offset, req_size);
2532}
2533
2526struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry); 2534struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry);
2527int btrfs_set_inode_index(struct inode *dir, u64 *index); 2535int btrfs_set_inode_index(struct inode *dir, u64 *index);
2528int btrfs_unlink_inode(struct btrfs_trans_handle *trans, 2536int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
@@ -2551,9 +2559,6 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
2551int btrfs_merge_bio_hook(struct page *page, unsigned long offset, 2559int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
2552 size_t size, struct bio *bio, unsigned long bio_flags); 2560 size_t size, struct bio *bio, unsigned long bio_flags);
2553 2561
2554unsigned long btrfs_force_ra(struct address_space *mapping,
2555 struct file_ra_state *ra, struct file *file,
2556 pgoff_t offset, pgoff_t last_index);
2557int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); 2562int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
2558int btrfs_readpage(struct file *file, struct page *page); 2563int btrfs_readpage(struct file *file, struct page *page);
2559void btrfs_evict_inode(struct inode *inode); 2564void btrfs_evict_inode(struct inode *inode);
@@ -2648,12 +2653,21 @@ do { \
2648/* acl.c */ 2653/* acl.c */
2649#ifdef CONFIG_BTRFS_FS_POSIX_ACL 2654#ifdef CONFIG_BTRFS_FS_POSIX_ACL
2650struct posix_acl *btrfs_get_acl(struct inode *inode, int type); 2655struct posix_acl *btrfs_get_acl(struct inode *inode, int type);
2651#else
2652#define btrfs_get_acl NULL
2653#endif
2654int btrfs_init_acl(struct btrfs_trans_handle *trans, 2656int btrfs_init_acl(struct btrfs_trans_handle *trans,
2655 struct inode *inode, struct inode *dir); 2657 struct inode *inode, struct inode *dir);
2656int btrfs_acl_chmod(struct inode *inode); 2658int btrfs_acl_chmod(struct inode *inode);
2659#else
2660#define btrfs_get_acl NULL
2661static inline int btrfs_init_acl(struct btrfs_trans_handle *trans,
2662 struct inode *inode, struct inode *dir)
2663{
2664 return 0;
2665}
2666static inline int btrfs_acl_chmod(struct inode *inode)
2667{
2668 return 0;
2669}
2670#endif
2657 2671
2658/* relocation.c */ 2672/* relocation.c */
2659int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start); 2673int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start);
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index c360a848d97f..31d84e78129b 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -198,8 +198,6 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
198 struct btrfs_key key; 198 struct btrfs_key key;
199 int ins_len = mod < 0 ? -1 : 0; 199 int ins_len = mod < 0 ? -1 : 0;
200 int cow = mod != 0; 200 int cow = mod != 0;
201 struct btrfs_key found_key;
202 struct extent_buffer *leaf;
203 201
204 key.objectid = dir; 202 key.objectid = dir;
205 btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); 203 btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY);
@@ -209,18 +207,7 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
209 ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); 207 ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
210 if (ret < 0) 208 if (ret < 0)
211 return ERR_PTR(ret); 209 return ERR_PTR(ret);
212 if (ret > 0) { 210 if (ret > 0)
213 if (path->slots[0] == 0)
214 return NULL;
215 path->slots[0]--;
216 }
217
218 leaf = path->nodes[0];
219 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
220
221 if (found_key.objectid != dir ||
222 btrfs_key_type(&found_key) != BTRFS_DIR_ITEM_KEY ||
223 found_key.offset != key.offset)
224 return NULL; 211 return NULL;
225 212
226 return btrfs_match_dir_item_name(root, path, name, name_len); 213 return btrfs_match_dir_item_name(root, path, name, name_len);
@@ -315,8 +302,6 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
315 struct btrfs_key key; 302 struct btrfs_key key;
316 int ins_len = mod < 0 ? -1 : 0; 303 int ins_len = mod < 0 ? -1 : 0;
317 int cow = mod != 0; 304 int cow = mod != 0;
318 struct btrfs_key found_key;
319 struct extent_buffer *leaf;
320 305
321 key.objectid = dir; 306 key.objectid = dir;
322 btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); 307 btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY);
@@ -324,18 +309,7 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
324 ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); 309 ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
325 if (ret < 0) 310 if (ret < 0)
326 return ERR_PTR(ret); 311 return ERR_PTR(ret);
327 if (ret > 0) { 312 if (ret > 0)
328 if (path->slots[0] == 0)
329 return NULL;
330 path->slots[0]--;
331 }
332
333 leaf = path->nodes[0];
334 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
335
336 if (found_key.objectid != dir ||
337 btrfs_key_type(&found_key) != BTRFS_XATTR_ITEM_KEY ||
338 found_key.offset != key.offset)
339 return NULL; 313 return NULL;
340 314
341 return btrfs_match_dir_item_name(root, path, name, name_len); 315 return btrfs_match_dir_item_name(root, path, name, name_len);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 4d08ed79405d..66bac226944e 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -663,7 +663,9 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
663 struct btrfs_path *path; 663 struct btrfs_path *path;
664 664
665 path = btrfs_alloc_path(); 665 path = btrfs_alloc_path();
666 BUG_ON(!path); 666 if (!path)
667 return -ENOMEM;
668
667 key.objectid = start; 669 key.objectid = start;
668 key.offset = len; 670 key.offset = len;
669 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 671 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
@@ -3272,6 +3274,9 @@ again:
3272 } 3274 }
3273 3275
3274 ret = btrfs_alloc_chunk(trans, extent_root, flags); 3276 ret = btrfs_alloc_chunk(trans, extent_root, flags);
3277 if (ret < 0 && ret != -ENOSPC)
3278 goto out;
3279
3275 spin_lock(&space_info->lock); 3280 spin_lock(&space_info->lock);
3276 if (ret) 3281 if (ret)
3277 space_info->full = 1; 3282 space_info->full = 1;
@@ -3281,6 +3286,7 @@ again:
3281 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE; 3286 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
3282 space_info->chunk_alloc = 0; 3287 space_info->chunk_alloc = 0;
3283 spin_unlock(&space_info->lock); 3288 spin_unlock(&space_info->lock);
3289out:
3284 mutex_unlock(&extent_root->fs_info->chunk_mutex); 3290 mutex_unlock(&extent_root->fs_info->chunk_mutex);
3285 return ret; 3291 return ret;
3286} 3292}
@@ -4456,7 +4462,9 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
4456 printk(KERN_ERR "umm, got %d back from search" 4462 printk(KERN_ERR "umm, got %d back from search"
4457 ", was looking for %llu\n", ret, 4463 ", was looking for %llu\n", ret,
4458 (unsigned long long)bytenr); 4464 (unsigned long long)bytenr);
4459 btrfs_print_leaf(extent_root, path->nodes[0]); 4465 if (ret > 0)
4466 btrfs_print_leaf(extent_root,
4467 path->nodes[0]);
4460 } 4468 }
4461 BUG_ON(ret); 4469 BUG_ON(ret);
4462 extent_slot = path->slots[0]; 4470 extent_slot = path->slots[0];
@@ -5073,7 +5081,9 @@ have_block_group:
5073 * group is does point to and try again 5081 * group is does point to and try again
5074 */ 5082 */
5075 if (!last_ptr_loop && last_ptr->block_group && 5083 if (!last_ptr_loop && last_ptr->block_group &&
5076 last_ptr->block_group != block_group) { 5084 last_ptr->block_group != block_group &&
5085 index <=
5086 get_block_group_index(last_ptr->block_group)) {
5077 5087
5078 btrfs_put_block_group(block_group); 5088 btrfs_put_block_group(block_group);
5079 block_group = last_ptr->block_group; 5089 block_group = last_ptr->block_group;
@@ -5501,7 +5511,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
5501 u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref); 5511 u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref);
5502 5512
5503 path = btrfs_alloc_path(); 5513 path = btrfs_alloc_path();
5504 BUG_ON(!path); 5514 if (!path)
5515 return -ENOMEM;
5505 5516
5506 path->leave_spinning = 1; 5517 path->leave_spinning = 1;
5507 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, 5518 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
@@ -6272,10 +6283,14 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
6272 int level; 6283 int level;
6273 6284
6274 path = btrfs_alloc_path(); 6285 path = btrfs_alloc_path();
6275 BUG_ON(!path); 6286 if (!path)
6287 return -ENOMEM;
6276 6288
6277 wc = kzalloc(sizeof(*wc), GFP_NOFS); 6289 wc = kzalloc(sizeof(*wc), GFP_NOFS);
6278 BUG_ON(!wc); 6290 if (!wc) {
6291 btrfs_free_path(path);
6292 return -ENOMEM;
6293 }
6279 6294
6280 trans = btrfs_start_transaction(tree_root, 0); 6295 trans = btrfs_start_transaction(tree_root, 0);
6281 BUG_ON(IS_ERR(trans)); 6296 BUG_ON(IS_ERR(trans));
@@ -6538,8 +6553,6 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
6538 u64 min_allocable_bytes; 6553 u64 min_allocable_bytes;
6539 int ret = -ENOSPC; 6554 int ret = -ENOSPC;
6540 6555
6541 if (cache->ro)
6542 return 0;
6543 6556
6544 /* 6557 /*
6545 * We need some metadata space and system metadata space for 6558 * We need some metadata space and system metadata space for
@@ -6555,6 +6568,12 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
6555 6568
6556 spin_lock(&sinfo->lock); 6569 spin_lock(&sinfo->lock);
6557 spin_lock(&cache->lock); 6570 spin_lock(&cache->lock);
6571
6572 if (cache->ro) {
6573 ret = 0;
6574 goto out;
6575 }
6576
6558 num_bytes = cache->key.offset - cache->reserved - cache->pinned - 6577 num_bytes = cache->key.offset - cache->reserved - cache->pinned -
6559 cache->bytes_super - btrfs_block_group_used(&cache->item); 6578 cache->bytes_super - btrfs_block_group_used(&cache->item);
6560 6579
@@ -6568,7 +6587,7 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
6568 cache->ro = 1; 6587 cache->ro = 1;
6569 ret = 0; 6588 ret = 0;
6570 } 6589 }
6571 6590out:
6572 spin_unlock(&cache->lock); 6591 spin_unlock(&cache->lock);
6573 spin_unlock(&sinfo->lock); 6592 spin_unlock(&sinfo->lock);
6574 return ret; 6593 return ret;
@@ -7183,11 +7202,15 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
7183 spin_unlock(&cluster->refill_lock); 7202 spin_unlock(&cluster->refill_lock);
7184 7203
7185 path = btrfs_alloc_path(); 7204 path = btrfs_alloc_path();
7186 BUG_ON(!path); 7205 if (!path) {
7206 ret = -ENOMEM;
7207 goto out;
7208 }
7187 7209
7188 inode = lookup_free_space_inode(root, block_group, path); 7210 inode = lookup_free_space_inode(root, block_group, path);
7189 if (!IS_ERR(inode)) { 7211 if (!IS_ERR(inode)) {
7190 btrfs_orphan_add(trans, inode); 7212 ret = btrfs_orphan_add(trans, inode);
7213 BUG_ON(ret);
7191 clear_nlink(inode); 7214 clear_nlink(inode);
7192 /* One for the block groups ref */ 7215 /* One for the block groups ref */
7193 spin_lock(&block_group->lock); 7216 spin_lock(&block_group->lock);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 067b1747421b..d418164a35f1 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -254,14 +254,14 @@ static void merge_cb(struct extent_io_tree *tree, struct extent_state *new,
254 * 254 *
255 * This should be called with the tree lock held. 255 * This should be called with the tree lock held.
256 */ 256 */
257static int merge_state(struct extent_io_tree *tree, 257static void merge_state(struct extent_io_tree *tree,
258 struct extent_state *state) 258 struct extent_state *state)
259{ 259{
260 struct extent_state *other; 260 struct extent_state *other;
261 struct rb_node *other_node; 261 struct rb_node *other_node;
262 262
263 if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) 263 if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY))
264 return 0; 264 return;
265 265
266 other_node = rb_prev(&state->rb_node); 266 other_node = rb_prev(&state->rb_node);
267 if (other_node) { 267 if (other_node) {
@@ -287,19 +287,13 @@ static int merge_state(struct extent_io_tree *tree,
287 free_extent_state(other); 287 free_extent_state(other);
288 } 288 }
289 } 289 }
290
291 return 0;
292} 290}
293 291
294static int set_state_cb(struct extent_io_tree *tree, 292static void set_state_cb(struct extent_io_tree *tree,
295 struct extent_state *state, int *bits) 293 struct extent_state *state, int *bits)
296{ 294{
297 if (tree->ops && tree->ops->set_bit_hook) { 295 if (tree->ops && tree->ops->set_bit_hook)
298 return tree->ops->set_bit_hook(tree->mapping->host, 296 tree->ops->set_bit_hook(tree->mapping->host, state, bits);
299 state, bits);
300 }
301
302 return 0;
303} 297}
304 298
305static void clear_state_cb(struct extent_io_tree *tree, 299static void clear_state_cb(struct extent_io_tree *tree,
@@ -309,6 +303,9 @@ static void clear_state_cb(struct extent_io_tree *tree,
309 tree->ops->clear_bit_hook(tree->mapping->host, state, bits); 303 tree->ops->clear_bit_hook(tree->mapping->host, state, bits);
310} 304}
311 305
306static void set_state_bits(struct extent_io_tree *tree,
307 struct extent_state *state, int *bits);
308
312/* 309/*
313 * insert an extent_state struct into the tree. 'bits' are set on the 310 * insert an extent_state struct into the tree. 'bits' are set on the
314 * struct before it is inserted. 311 * struct before it is inserted.
@@ -324,8 +321,6 @@ static int insert_state(struct extent_io_tree *tree,
324 int *bits) 321 int *bits)
325{ 322{
326 struct rb_node *node; 323 struct rb_node *node;
327 int bits_to_set = *bits & ~EXTENT_CTLBITS;
328 int ret;
329 324
330 if (end < start) { 325 if (end < start) {
331 printk(KERN_ERR "btrfs end < start %llu %llu\n", 326 printk(KERN_ERR "btrfs end < start %llu %llu\n",
@@ -335,13 +330,9 @@ static int insert_state(struct extent_io_tree *tree,
335 } 330 }
336 state->start = start; 331 state->start = start;
337 state->end = end; 332 state->end = end;
338 ret = set_state_cb(tree, state, bits);
339 if (ret)
340 return ret;
341 333
342 if (bits_to_set & EXTENT_DIRTY) 334 set_state_bits(tree, state, bits);
343 tree->dirty_bytes += end - start + 1; 335
344 state->state |= bits_to_set;
345 node = tree_insert(&tree->state, end, &state->rb_node); 336 node = tree_insert(&tree->state, end, &state->rb_node);
346 if (node) { 337 if (node) {
347 struct extent_state *found; 338 struct extent_state *found;
@@ -357,13 +348,11 @@ static int insert_state(struct extent_io_tree *tree,
357 return 0; 348 return 0;
358} 349}
359 350
360static int split_cb(struct extent_io_tree *tree, struct extent_state *orig, 351static void split_cb(struct extent_io_tree *tree, struct extent_state *orig,
361 u64 split) 352 u64 split)
362{ 353{
363 if (tree->ops && tree->ops->split_extent_hook) 354 if (tree->ops && tree->ops->split_extent_hook)
364 return tree->ops->split_extent_hook(tree->mapping->host, 355 tree->ops->split_extent_hook(tree->mapping->host, orig, split);
365 orig, split);
366 return 0;
367} 356}
368 357
369/* 358/*
@@ -659,34 +648,25 @@ again:
659 if (start > end) 648 if (start > end)
660 break; 649 break;
661 650
662 if (need_resched()) { 651 cond_resched_lock(&tree->lock);
663 spin_unlock(&tree->lock);
664 cond_resched();
665 spin_lock(&tree->lock);
666 }
667 } 652 }
668out: 653out:
669 spin_unlock(&tree->lock); 654 spin_unlock(&tree->lock);
670 return 0; 655 return 0;
671} 656}
672 657
673static int set_state_bits(struct extent_io_tree *tree, 658static void set_state_bits(struct extent_io_tree *tree,
674 struct extent_state *state, 659 struct extent_state *state,
675 int *bits) 660 int *bits)
676{ 661{
677 int ret;
678 int bits_to_set = *bits & ~EXTENT_CTLBITS; 662 int bits_to_set = *bits & ~EXTENT_CTLBITS;
679 663
680 ret = set_state_cb(tree, state, bits); 664 set_state_cb(tree, state, bits);
681 if (ret)
682 return ret;
683 if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { 665 if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
684 u64 range = state->end - state->start + 1; 666 u64 range = state->end - state->start + 1;
685 tree->dirty_bytes += range; 667 tree->dirty_bytes += range;
686 } 668 }
687 state->state |= bits_to_set; 669 state->state |= bits_to_set;
688
689 return 0;
690} 670}
691 671
692static void cache_state(struct extent_state *state, 672static void cache_state(struct extent_state *state,
@@ -779,9 +759,7 @@ hit_next:
779 goto out; 759 goto out;
780 } 760 }
781 761
782 err = set_state_bits(tree, state, &bits); 762 set_state_bits(tree, state, &bits);
783 if (err)
784 goto out;
785 763
786 cache_state(state, cached_state); 764 cache_state(state, cached_state);
787 merge_state(tree, state); 765 merge_state(tree, state);
@@ -830,9 +808,7 @@ hit_next:
830 if (err) 808 if (err)
831 goto out; 809 goto out;
832 if (state->end <= end) { 810 if (state->end <= end) {
833 err = set_state_bits(tree, state, &bits); 811 set_state_bits(tree, state, &bits);
834 if (err)
835 goto out;
836 cache_state(state, cached_state); 812 cache_state(state, cached_state);
837 merge_state(tree, state); 813 merge_state(tree, state);
838 if (last_end == (u64)-1) 814 if (last_end == (u64)-1)
@@ -893,11 +869,7 @@ hit_next:
893 err = split_state(tree, state, prealloc, end + 1); 869 err = split_state(tree, state, prealloc, end + 1);
894 BUG_ON(err == -EEXIST); 870 BUG_ON(err == -EEXIST);
895 871
896 err = set_state_bits(tree, prealloc, &bits); 872 set_state_bits(tree, prealloc, &bits);
897 if (err) {
898 prealloc = NULL;
899 goto out;
900 }
901 cache_state(prealloc, cached_state); 873 cache_state(prealloc, cached_state);
902 merge_state(tree, prealloc); 874 merge_state(tree, prealloc);
903 prealloc = NULL; 875 prealloc = NULL;
@@ -1059,46 +1031,6 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
1059 return 0; 1031 return 0;
1060} 1032}
1061 1033
1062/*
1063 * find the first offset in the io tree with 'bits' set. zero is
1064 * returned if we find something, and *start_ret and *end_ret are
1065 * set to reflect the state struct that was found.
1066 *
1067 * If nothing was found, 1 is returned, < 0 on error
1068 */
1069int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
1070 u64 *start_ret, u64 *end_ret, int bits)
1071{
1072 struct rb_node *node;
1073 struct extent_state *state;
1074 int ret = 1;
1075
1076 spin_lock(&tree->lock);
1077 /*
1078 * this search will find all the extents that end after
1079 * our range starts.
1080 */
1081 node = tree_search(tree, start);
1082 if (!node)
1083 goto out;
1084
1085 while (1) {
1086 state = rb_entry(node, struct extent_state, rb_node);
1087 if (state->end >= start && (state->state & bits)) {
1088 *start_ret = state->start;
1089 *end_ret = state->end;
1090 ret = 0;
1091 break;
1092 }
1093 node = rb_next(node);
1094 if (!node)
1095 break;
1096 }
1097out:
1098 spin_unlock(&tree->lock);
1099 return ret;
1100}
1101
1102/* find the first state struct with 'bits' set after 'start', and 1034/* find the first state struct with 'bits' set after 'start', and
1103 * return it. tree->lock must be held. NULL will returned if 1035 * return it. tree->lock must be held. NULL will returned if
1104 * nothing was found after 'start' 1036 * nothing was found after 'start'
@@ -1131,6 +1063,30 @@ out:
1131} 1063}
1132 1064
1133/* 1065/*
1066 * find the first offset in the io tree with 'bits' set. zero is
1067 * returned if we find something, and *start_ret and *end_ret are
1068 * set to reflect the state struct that was found.
1069 *
1070 * If nothing was found, 1 is returned, < 0 on error
1071 */
1072int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
1073 u64 *start_ret, u64 *end_ret, int bits)
1074{
1075 struct extent_state *state;
1076 int ret = 1;
1077
1078 spin_lock(&tree->lock);
1079 state = find_first_extent_bit_state(tree, start, bits);
1080 if (state) {
1081 *start_ret = state->start;
1082 *end_ret = state->end;
1083 ret = 0;
1084 }
1085 spin_unlock(&tree->lock);
1086 return ret;
1087}
1088
1089/*
1134 * find a contiguous range of bytes in the file marked as delalloc, not 1090 * find a contiguous range of bytes in the file marked as delalloc, not
1135 * more than 'max_bytes'. start and end are used to return the range, 1091 * more than 'max_bytes'. start and end are used to return the range,
1136 * 1092 *
@@ -2546,7 +2502,6 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
2546 struct writeback_control *wbc) 2502 struct writeback_control *wbc)
2547{ 2503{
2548 int ret; 2504 int ret;
2549 struct address_space *mapping = page->mapping;
2550 struct extent_page_data epd = { 2505 struct extent_page_data epd = {
2551 .bio = NULL, 2506 .bio = NULL,
2552 .tree = tree, 2507 .tree = tree,
@@ -2554,17 +2509,9 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
2554 .extent_locked = 0, 2509 .extent_locked = 0,
2555 .sync_io = wbc->sync_mode == WB_SYNC_ALL, 2510 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
2556 }; 2511 };
2557 struct writeback_control wbc_writepages = {
2558 .sync_mode = wbc->sync_mode,
2559 .nr_to_write = 64,
2560 .range_start = page_offset(page) + PAGE_CACHE_SIZE,
2561 .range_end = (loff_t)-1,
2562 };
2563 2512
2564 ret = __extent_writepage(page, wbc, &epd); 2513 ret = __extent_writepage(page, wbc, &epd);
2565 2514
2566 extent_write_cache_pages(tree, mapping, &wbc_writepages,
2567 __extent_writepage, &epd, flush_write_bio);
2568 flush_epd_write_bio(&epd); 2515 flush_epd_write_bio(&epd);
2569 return ret; 2516 return ret;
2570} 2517}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 21a7ca9e7282..7b2f0c3e7929 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -76,15 +76,15 @@ struct extent_io_ops {
76 struct extent_state *state); 76 struct extent_state *state);
77 int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, 77 int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
78 struct extent_state *state, int uptodate); 78 struct extent_state *state, int uptodate);
79 int (*set_bit_hook)(struct inode *inode, struct extent_state *state, 79 void (*set_bit_hook)(struct inode *inode, struct extent_state *state,
80 int *bits); 80 int *bits);
81 int (*clear_bit_hook)(struct inode *inode, struct extent_state *state, 81 void (*clear_bit_hook)(struct inode *inode, struct extent_state *state,
82 int *bits); 82 int *bits);
83 int (*merge_extent_hook)(struct inode *inode, 83 void (*merge_extent_hook)(struct inode *inode,
84 struct extent_state *new, 84 struct extent_state *new,
85 struct extent_state *other); 85 struct extent_state *other);
86 int (*split_extent_hook)(struct inode *inode, 86 void (*split_extent_hook)(struct inode *inode,
87 struct extent_state *orig, u64 split); 87 struct extent_state *orig, u64 split);
88 int (*write_cache_pages_lock_hook)(struct page *page); 88 int (*write_cache_pages_lock_hook)(struct page *page);
89}; 89};
90 90
@@ -108,8 +108,6 @@ struct extent_state {
108 wait_queue_head_t wq; 108 wait_queue_head_t wq;
109 atomic_t refs; 109 atomic_t refs;
110 unsigned long state; 110 unsigned long state;
111 u64 split_start;
112 u64 split_end;
113 111
114 /* for use by the FS */ 112 /* for use by the FS */
115 u64 private; 113 u64 private;
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 2d0410344ea3..7c97b3301459 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -183,22 +183,10 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next)
183 return 0; 183 return 0;
184} 184}
185 185
186int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len) 186static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
187{ 187{
188 int ret = 0;
189 struct extent_map *merge = NULL; 188 struct extent_map *merge = NULL;
190 struct rb_node *rb; 189 struct rb_node *rb;
191 struct extent_map *em;
192
193 write_lock(&tree->lock);
194 em = lookup_extent_mapping(tree, start, len);
195
196 WARN_ON(!em || em->start != start);
197
198 if (!em)
199 goto out;
200
201 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
202 190
203 if (em->start != 0) { 191 if (em->start != 0) {
204 rb = rb_prev(&em->rb_node); 192 rb = rb_prev(&em->rb_node);
@@ -225,6 +213,24 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len)
225 merge->in_tree = 0; 213 merge->in_tree = 0;
226 free_extent_map(merge); 214 free_extent_map(merge);
227 } 215 }
216}
217
218int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len)
219{
220 int ret = 0;
221 struct extent_map *em;
222
223 write_lock(&tree->lock);
224 em = lookup_extent_mapping(tree, start, len);
225
226 WARN_ON(!em || em->start != start);
227
228 if (!em)
229 goto out;
230
231 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
232
233 try_merge_map(tree, em);
228 234
229 free_extent_map(em); 235 free_extent_map(em);
230out: 236out:
@@ -247,7 +253,6 @@ int add_extent_mapping(struct extent_map_tree *tree,
247 struct extent_map *em) 253 struct extent_map *em)
248{ 254{
249 int ret = 0; 255 int ret = 0;
250 struct extent_map *merge = NULL;
251 struct rb_node *rb; 256 struct rb_node *rb;
252 struct extent_map *exist; 257 struct extent_map *exist;
253 258
@@ -263,30 +268,8 @@ int add_extent_mapping(struct extent_map_tree *tree,
263 goto out; 268 goto out;
264 } 269 }
265 atomic_inc(&em->refs); 270 atomic_inc(&em->refs);
266 if (em->start != 0) { 271
267 rb = rb_prev(&em->rb_node); 272 try_merge_map(tree, em);
268 if (rb)
269 merge = rb_entry(rb, struct extent_map, rb_node);
270 if (rb && mergable_maps(merge, em)) {
271 em->start = merge->start;
272 em->len += merge->len;
273 em->block_len += merge->block_len;
274 em->block_start = merge->block_start;
275 merge->in_tree = 0;
276 rb_erase(&merge->rb_node, &tree->map);
277 free_extent_map(merge);
278 }
279 }
280 rb = rb_next(&em->rb_node);
281 if (rb)
282 merge = rb_entry(rb, struct extent_map, rb_node);
283 if (rb && mergable_maps(em, merge)) {
284 em->len += merge->len;
285 em->block_len += merge->len;
286 rb_erase(&merge->rb_node, &tree->map);
287 merge->in_tree = 0;
288 free_extent_map(merge);
289 }
290out: 273out:
291 return ret; 274 return ret;
292} 275}
@@ -299,19 +282,8 @@ static u64 range_end(u64 start, u64 len)
299 return start + len; 282 return start + len;
300} 283}
301 284
302/** 285struct extent_map *__lookup_extent_mapping(struct extent_map_tree *tree,
303 * lookup_extent_mapping - lookup extent_map 286 u64 start, u64 len, int strict)
304 * @tree: tree to lookup in
305 * @start: byte offset to start the search
306 * @len: length of the lookup range
307 *
308 * Find and return the first extent_map struct in @tree that intersects the
309 * [start, len] range. There may be additional objects in the tree that
310 * intersect, so check the object returned carefully to make sure that no
311 * additional lookups are needed.
312 */
313struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
314 u64 start, u64 len)
315{ 287{
316 struct extent_map *em; 288 struct extent_map *em;
317 struct rb_node *rb_node; 289 struct rb_node *rb_node;
@@ -320,38 +292,42 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
320 u64 end = range_end(start, len); 292 u64 end = range_end(start, len);
321 293
322 rb_node = __tree_search(&tree->map, start, &prev, &next); 294 rb_node = __tree_search(&tree->map, start, &prev, &next);
323 if (!rb_node && prev) {
324 em = rb_entry(prev, struct extent_map, rb_node);
325 if (end > em->start && start < extent_map_end(em))
326 goto found;
327 }
328 if (!rb_node && next) {
329 em = rb_entry(next, struct extent_map, rb_node);
330 if (end > em->start && start < extent_map_end(em))
331 goto found;
332 }
333 if (!rb_node) { 295 if (!rb_node) {
334 em = NULL; 296 if (prev)
335 goto out; 297 rb_node = prev;
336 } 298 else if (next)
337 if (IS_ERR(rb_node)) { 299 rb_node = next;
338 em = ERR_CAST(rb_node); 300 else
339 goto out; 301 return NULL;
340 } 302 }
303
341 em = rb_entry(rb_node, struct extent_map, rb_node); 304 em = rb_entry(rb_node, struct extent_map, rb_node);
342 if (end > em->start && start < extent_map_end(em))
343 goto found;
344 305
345 em = NULL; 306 if (strict && !(end > em->start && start < extent_map_end(em)))
346 goto out; 307 return NULL;
347 308
348found:
349 atomic_inc(&em->refs); 309 atomic_inc(&em->refs);
350out:
351 return em; 310 return em;
352} 311}
353 312
354/** 313/**
314 * lookup_extent_mapping - lookup extent_map
315 * @tree: tree to lookup in
316 * @start: byte offset to start the search
317 * @len: length of the lookup range
318 *
319 * Find and return the first extent_map struct in @tree that intersects the
320 * [start, len] range. There may be additional objects in the tree that
321 * intersect, so check the object returned carefully to make sure that no
322 * additional lookups are needed.
323 */
324struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
325 u64 start, u64 len)
326{
327 return __lookup_extent_mapping(tree, start, len, 1);
328}
329
330/**
355 * search_extent_mapping - find a nearby extent map 331 * search_extent_mapping - find a nearby extent map
356 * @tree: tree to lookup in 332 * @tree: tree to lookup in
357 * @start: byte offset to start the search 333 * @start: byte offset to start the search
@@ -365,38 +341,7 @@ out:
365struct extent_map *search_extent_mapping(struct extent_map_tree *tree, 341struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
366 u64 start, u64 len) 342 u64 start, u64 len)
367{ 343{
368 struct extent_map *em; 344 return __lookup_extent_mapping(tree, start, len, 0);
369 struct rb_node *rb_node;
370 struct rb_node *prev = NULL;
371 struct rb_node *next = NULL;
372
373 rb_node = __tree_search(&tree->map, start, &prev, &next);
374 if (!rb_node && prev) {
375 em = rb_entry(prev, struct extent_map, rb_node);
376 goto found;
377 }
378 if (!rb_node && next) {
379 em = rb_entry(next, struct extent_map, rb_node);
380 goto found;
381 }
382 if (!rb_node) {
383 em = NULL;
384 goto out;
385 }
386 if (IS_ERR(rb_node)) {
387 em = ERR_CAST(rb_node);
388 goto out;
389 }
390 em = rb_entry(rb_node, struct extent_map, rb_node);
391 goto found;
392
393 em = NULL;
394 goto out;
395
396found:
397 atomic_inc(&em->refs);
398out:
399 return em;
400} 345}
401 346
402/** 347/**
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 08bcfa92a222..b910694f61ed 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -291,7 +291,8 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
291 u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy); 291 u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy);
292 292
293 path = btrfs_alloc_path(); 293 path = btrfs_alloc_path();
294 BUG_ON(!path); 294 if (!path)
295 return -ENOMEM;
295 296
296 if (search_commit) { 297 if (search_commit) {
297 path->skip_locking = 1; 298 path->skip_locking = 1;
@@ -677,7 +678,9 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
677 btrfs_super_csum_size(&root->fs_info->super_copy); 678 btrfs_super_csum_size(&root->fs_info->super_copy);
678 679
679 path = btrfs_alloc_path(); 680 path = btrfs_alloc_path();
680 BUG_ON(!path); 681 if (!path)
682 return -ENOMEM;
683
681 sector_sum = sums->sums; 684 sector_sum = sums->sums;
682again: 685again:
683 next_offset = (u64)-1; 686 next_offset = (u64)-1;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index a35e51c9f235..658d66959abe 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -74,7 +74,7 @@ struct inode_defrag {
74 * If an existing record is found the defrag item you 74 * If an existing record is found the defrag item you
75 * pass in is freed 75 * pass in is freed
76 */ 76 */
77static int __btrfs_add_inode_defrag(struct inode *inode, 77static void __btrfs_add_inode_defrag(struct inode *inode,
78 struct inode_defrag *defrag) 78 struct inode_defrag *defrag)
79{ 79{
80 struct btrfs_root *root = BTRFS_I(inode)->root; 80 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -106,11 +106,11 @@ static int __btrfs_add_inode_defrag(struct inode *inode,
106 BTRFS_I(inode)->in_defrag = 1; 106 BTRFS_I(inode)->in_defrag = 1;
107 rb_link_node(&defrag->rb_node, parent, p); 107 rb_link_node(&defrag->rb_node, parent, p);
108 rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes); 108 rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes);
109 return 0; 109 return;
110 110
111exists: 111exists:
112 kfree(defrag); 112 kfree(defrag);
113 return 0; 113 return;
114 114
115} 115}
116 116
@@ -123,7 +123,6 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
123{ 123{
124 struct btrfs_root *root = BTRFS_I(inode)->root; 124 struct btrfs_root *root = BTRFS_I(inode)->root;
125 struct inode_defrag *defrag; 125 struct inode_defrag *defrag;
126 int ret = 0;
127 u64 transid; 126 u64 transid;
128 127
129 if (!btrfs_test_opt(root, AUTO_DEFRAG)) 128 if (!btrfs_test_opt(root, AUTO_DEFRAG))
@@ -150,9 +149,9 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
150 149
151 spin_lock(&root->fs_info->defrag_inodes_lock); 150 spin_lock(&root->fs_info->defrag_inodes_lock);
152 if (!BTRFS_I(inode)->in_defrag) 151 if (!BTRFS_I(inode)->in_defrag)
153 ret = __btrfs_add_inode_defrag(inode, defrag); 152 __btrfs_add_inode_defrag(inode, defrag);
154 spin_unlock(&root->fs_info->defrag_inodes_lock); 153 spin_unlock(&root->fs_info->defrag_inodes_lock);
155 return ret; 154 return 0;
156} 155}
157 156
158/* 157/*
@@ -855,7 +854,8 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
855 btrfs_drop_extent_cache(inode, start, end - 1, 0); 854 btrfs_drop_extent_cache(inode, start, end - 1, 0);
856 855
857 path = btrfs_alloc_path(); 856 path = btrfs_alloc_path();
858 BUG_ON(!path); 857 if (!path)
858 return -ENOMEM;
859again: 859again:
860 recow = 0; 860 recow = 0;
861 split = start; 861 split = start;
@@ -1059,7 +1059,7 @@ static int prepare_uptodate_page(struct page *page, u64 pos)
1059static noinline int prepare_pages(struct btrfs_root *root, struct file *file, 1059static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
1060 struct page **pages, size_t num_pages, 1060 struct page **pages, size_t num_pages,
1061 loff_t pos, unsigned long first_index, 1061 loff_t pos, unsigned long first_index,
1062 unsigned long last_index, size_t write_bytes) 1062 size_t write_bytes)
1063{ 1063{
1064 struct extent_state *cached_state = NULL; 1064 struct extent_state *cached_state = NULL;
1065 int i; 1065 int i;
@@ -1159,7 +1159,6 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1159 struct btrfs_root *root = BTRFS_I(inode)->root; 1159 struct btrfs_root *root = BTRFS_I(inode)->root;
1160 struct page **pages = NULL; 1160 struct page **pages = NULL;
1161 unsigned long first_index; 1161 unsigned long first_index;
1162 unsigned long last_index;
1163 size_t num_written = 0; 1162 size_t num_written = 0;
1164 int nrptrs; 1163 int nrptrs;
1165 int ret = 0; 1164 int ret = 0;
@@ -1172,7 +1171,6 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1172 return -ENOMEM; 1171 return -ENOMEM;
1173 1172
1174 first_index = pos >> PAGE_CACHE_SHIFT; 1173 first_index = pos >> PAGE_CACHE_SHIFT;
1175 last_index = (pos + iov_iter_count(i)) >> PAGE_CACHE_SHIFT;
1176 1174
1177 while (iov_iter_count(i) > 0) { 1175 while (iov_iter_count(i) > 0) {
1178 size_t offset = pos & (PAGE_CACHE_SIZE - 1); 1176 size_t offset = pos & (PAGE_CACHE_SIZE - 1);
@@ -1206,8 +1204,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1206 * contents of pages from loop to loop 1204 * contents of pages from loop to loop
1207 */ 1205 */
1208 ret = prepare_pages(root, file, pages, num_pages, 1206 ret = prepare_pages(root, file, pages, num_pages,
1209 pos, first_index, last_index, 1207 pos, first_index, write_bytes);
1210 write_bytes);
1211 if (ret) { 1208 if (ret) {
1212 btrfs_delalloc_release_space(inode, 1209 btrfs_delalloc_release_space(inode,
1213 num_pages << PAGE_CACHE_SHIFT); 1210 num_pages << PAGE_CACHE_SHIFT);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ae762dab37f8..15fceefbca0a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1061,7 +1061,8 @@ static noinline int run_delalloc_nocow(struct inode *inode,
1061 u64 ino = btrfs_ino(inode); 1061 u64 ino = btrfs_ino(inode);
1062 1062
1063 path = btrfs_alloc_path(); 1063 path = btrfs_alloc_path();
1064 BUG_ON(!path); 1064 if (!path)
1065 return -ENOMEM;
1065 1066
1066 nolock = btrfs_is_free_space_inode(root, inode); 1067 nolock = btrfs_is_free_space_inode(root, inode);
1067 1068
@@ -1282,17 +1283,16 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
1282 return ret; 1283 return ret;
1283} 1284}
1284 1285
1285static int btrfs_split_extent_hook(struct inode *inode, 1286static void btrfs_split_extent_hook(struct inode *inode,
1286 struct extent_state *orig, u64 split) 1287 struct extent_state *orig, u64 split)
1287{ 1288{
1288 /* not delalloc, ignore it */ 1289 /* not delalloc, ignore it */
1289 if (!(orig->state & EXTENT_DELALLOC)) 1290 if (!(orig->state & EXTENT_DELALLOC))
1290 return 0; 1291 return;
1291 1292
1292 spin_lock(&BTRFS_I(inode)->lock); 1293 spin_lock(&BTRFS_I(inode)->lock);
1293 BTRFS_I(inode)->outstanding_extents++; 1294 BTRFS_I(inode)->outstanding_extents++;
1294 spin_unlock(&BTRFS_I(inode)->lock); 1295 spin_unlock(&BTRFS_I(inode)->lock);
1295 return 0;
1296} 1296}
1297 1297
1298/* 1298/*
@@ -1301,18 +1301,17 @@ static int btrfs_split_extent_hook(struct inode *inode,
1301 * extents, such as when we are doing sequential writes, so we can properly 1301 * extents, such as when we are doing sequential writes, so we can properly
1302 * account for the metadata space we'll need. 1302 * account for the metadata space we'll need.
1303 */ 1303 */
1304static int btrfs_merge_extent_hook(struct inode *inode, 1304static void btrfs_merge_extent_hook(struct inode *inode,
1305 struct extent_state *new, 1305 struct extent_state *new,
1306 struct extent_state *other) 1306 struct extent_state *other)
1307{ 1307{
1308 /* not delalloc, ignore it */ 1308 /* not delalloc, ignore it */
1309 if (!(other->state & EXTENT_DELALLOC)) 1309 if (!(other->state & EXTENT_DELALLOC))
1310 return 0; 1310 return;
1311 1311
1312 spin_lock(&BTRFS_I(inode)->lock); 1312 spin_lock(&BTRFS_I(inode)->lock);
1313 BTRFS_I(inode)->outstanding_extents--; 1313 BTRFS_I(inode)->outstanding_extents--;
1314 spin_unlock(&BTRFS_I(inode)->lock); 1314 spin_unlock(&BTRFS_I(inode)->lock);
1315 return 0;
1316} 1315}
1317 1316
1318/* 1317/*
@@ -1320,8 +1319,8 @@ static int btrfs_merge_extent_hook(struct inode *inode,
1320 * bytes in this file, and to maintain the list of inodes that 1319 * bytes in this file, and to maintain the list of inodes that
1321 * have pending delalloc work to be done. 1320 * have pending delalloc work to be done.
1322 */ 1321 */
1323static int btrfs_set_bit_hook(struct inode *inode, 1322static void btrfs_set_bit_hook(struct inode *inode,
1324 struct extent_state *state, int *bits) 1323 struct extent_state *state, int *bits)
1325{ 1324{
1326 1325
1327 /* 1326 /*
@@ -1351,14 +1350,13 @@ static int btrfs_set_bit_hook(struct inode *inode,
1351 } 1350 }
1352 spin_unlock(&root->fs_info->delalloc_lock); 1351 spin_unlock(&root->fs_info->delalloc_lock);
1353 } 1352 }
1354 return 0;
1355} 1353}
1356 1354
1357/* 1355/*
1358 * extent_io.c clear_bit_hook, see set_bit_hook for why 1356 * extent_io.c clear_bit_hook, see set_bit_hook for why
1359 */ 1357 */
1360static int btrfs_clear_bit_hook(struct inode *inode, 1358static void btrfs_clear_bit_hook(struct inode *inode,
1361 struct extent_state *state, int *bits) 1359 struct extent_state *state, int *bits)
1362{ 1360{
1363 /* 1361 /*
1364 * set_bit and clear bit hooks normally require _irqsave/restore 1362 * set_bit and clear bit hooks normally require _irqsave/restore
@@ -1395,7 +1393,6 @@ static int btrfs_clear_bit_hook(struct inode *inode,
1395 } 1393 }
1396 spin_unlock(&root->fs_info->delalloc_lock); 1394 spin_unlock(&root->fs_info->delalloc_lock);
1397 } 1395 }
1398 return 0;
1399} 1396}
1400 1397
1401/* 1398/*
@@ -1645,7 +1642,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1645 int ret; 1642 int ret;
1646 1643
1647 path = btrfs_alloc_path(); 1644 path = btrfs_alloc_path();
1648 BUG_ON(!path); 1645 if (!path)
1646 return -ENOMEM;
1649 1647
1650 path->leave_spinning = 1; 1648 path->leave_spinning = 1;
1651 1649
@@ -2215,7 +2213,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
2215 2213
2216 if (!root->orphan_block_rsv) { 2214 if (!root->orphan_block_rsv) {
2217 block_rsv = btrfs_alloc_block_rsv(root); 2215 block_rsv = btrfs_alloc_block_rsv(root);
2218 BUG_ON(!block_rsv); 2216 if (!block_rsv)
2217 return -ENOMEM;
2219 } 2218 }
2220 2219
2221 spin_lock(&root->orphan_lock); 2220 spin_lock(&root->orphan_lock);
@@ -2517,7 +2516,9 @@ static void btrfs_read_locked_inode(struct inode *inode)
2517 filled = true; 2516 filled = true;
2518 2517
2519 path = btrfs_alloc_path(); 2518 path = btrfs_alloc_path();
2520 BUG_ON(!path); 2519 if (!path)
2520 goto make_bad;
2521
2521 path->leave_spinning = 1; 2522 path->leave_spinning = 1;
2522 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); 2523 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
2523 2524
@@ -2998,13 +2999,16 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
2998 2999
2999 ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, 3000 ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
3000 dentry->d_name.name, dentry->d_name.len); 3001 dentry->d_name.name, dentry->d_name.len);
3001 BUG_ON(ret); 3002 if (ret)
3003 goto out;
3002 3004
3003 if (inode->i_nlink == 0) { 3005 if (inode->i_nlink == 0) {
3004 ret = btrfs_orphan_add(trans, inode); 3006 ret = btrfs_orphan_add(trans, inode);
3005 BUG_ON(ret); 3007 if (ret)
3008 goto out;
3006 } 3009 }
3007 3010
3011out:
3008 nr = trans->blocks_used; 3012 nr = trans->blocks_used;
3009 __unlink_end_trans(trans, root); 3013 __unlink_end_trans(trans, root);
3010 btrfs_btree_balance_dirty(root, nr); 3014 btrfs_btree_balance_dirty(root, nr);
@@ -3147,6 +3151,11 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
3147 3151
3148 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); 3152 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
3149 3153
3154 path = btrfs_alloc_path();
3155 if (!path)
3156 return -ENOMEM;
3157 path->reada = -1;
3158
3150 if (root->ref_cows || root == root->fs_info->tree_root) 3159 if (root->ref_cows || root == root->fs_info->tree_root)
3151 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); 3160 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
3152 3161
@@ -3159,10 +3168,6 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
3159 if (min_type == 0 && root == BTRFS_I(inode)->root) 3168 if (min_type == 0 && root == BTRFS_I(inode)->root)
3160 btrfs_kill_delayed_inode_items(inode); 3169 btrfs_kill_delayed_inode_items(inode);
3161 3170
3162 path = btrfs_alloc_path();
3163 BUG_ON(!path);
3164 path->reada = -1;
3165
3166 key.objectid = ino; 3171 key.objectid = ino;
3167 key.offset = (u64)-1; 3172 key.offset = (u64)-1;
3168 key.type = (u8)-1; 3173 key.type = (u8)-1;
@@ -3690,7 +3695,8 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
3690 int ret = 0; 3695 int ret = 0;
3691 3696
3692 path = btrfs_alloc_path(); 3697 path = btrfs_alloc_path();
3693 BUG_ON(!path); 3698 if (!path)
3699 return -ENOMEM;
3694 3700
3695 di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(dir), name, 3701 di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(dir), name,
3696 namelen, 0); 3702 namelen, 0);
@@ -3946,6 +3952,7 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
3946 struct btrfs_root *root, int *new) 3952 struct btrfs_root *root, int *new)
3947{ 3953{
3948 struct inode *inode; 3954 struct inode *inode;
3955 int bad_inode = 0;
3949 3956
3950 inode = btrfs_iget_locked(s, location->objectid, root); 3957 inode = btrfs_iget_locked(s, location->objectid, root);
3951 if (!inode) 3958 if (!inode)
@@ -3955,10 +3962,19 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
3955 BTRFS_I(inode)->root = root; 3962 BTRFS_I(inode)->root = root;
3956 memcpy(&BTRFS_I(inode)->location, location, sizeof(*location)); 3963 memcpy(&BTRFS_I(inode)->location, location, sizeof(*location));
3957 btrfs_read_locked_inode(inode); 3964 btrfs_read_locked_inode(inode);
3958 inode_tree_add(inode); 3965 if (!is_bad_inode(inode)) {
3959 unlock_new_inode(inode); 3966 inode_tree_add(inode);
3960 if (new) 3967 unlock_new_inode(inode);
3961 *new = 1; 3968 if (new)
3969 *new = 1;
3970 } else {
3971 bad_inode = 1;
3972 }
3973 }
3974
3975 if (bad_inode) {
3976 iput(inode);
3977 inode = ERR_PTR(-ESTALE);
3962 } 3978 }
3963 3979
3964 return inode; 3980 return inode;
@@ -4451,7 +4467,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4451 int owner; 4467 int owner;
4452 4468
4453 path = btrfs_alloc_path(); 4469 path = btrfs_alloc_path();
4454 BUG_ON(!path); 4470 if (!path)
4471 return ERR_PTR(-ENOMEM);
4455 4472
4456 inode = new_inode(root->fs_info->sb); 4473 inode = new_inode(root->fs_info->sb);
4457 if (!inode) { 4474 if (!inode) {
@@ -6711,19 +6728,6 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
6711 return 0; 6728 return 0;
6712} 6729}
6713 6730
6714/* helper function for file defrag and space balancing. This
6715 * forces readahead on a given range of bytes in an inode
6716 */
6717unsigned long btrfs_force_ra(struct address_space *mapping,
6718 struct file_ra_state *ra, struct file *file,
6719 pgoff_t offset, pgoff_t last_index)
6720{
6721 pgoff_t req_size = last_index - offset + 1;
6722
6723 page_cache_sync_readahead(mapping, ra, file, offset, req_size);
6724 return offset + req_size;
6725}
6726
6727struct inode *btrfs_alloc_inode(struct super_block *sb) 6731struct inode *btrfs_alloc_inode(struct super_block *sb)
6728{ 6732{
6729 struct btrfs_inode *ei; 6733 struct btrfs_inode *ei;
@@ -7206,7 +7210,11 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
7206 goto out_unlock; 7210 goto out_unlock;
7207 7211
7208 path = btrfs_alloc_path(); 7212 path = btrfs_alloc_path();
7209 BUG_ON(!path); 7213 if (!path) {
7214 err = -ENOMEM;
7215 drop_inode = 1;
7216 goto out_unlock;
7217 }
7210 key.objectid = btrfs_ino(inode); 7218 key.objectid = btrfs_ino(inode);
7211 key.offset = 0; 7219 key.offset = 0;
7212 btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); 7220 btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 0b980afc5edd..7cf013349941 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1749,11 +1749,10 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
1749 key.objectid = key.offset; 1749 key.objectid = key.offset;
1750 key.offset = (u64)-1; 1750 key.offset = (u64)-1;
1751 dirid = key.objectid; 1751 dirid = key.objectid;
1752
1753 } 1752 }
1754 if (ptr < name) 1753 if (ptr < name)
1755 goto out; 1754 goto out;
1756 memcpy(name, ptr, total_len); 1755 memmove(name, ptr, total_len);
1757 name[total_len]='\0'; 1756 name[total_len]='\0';
1758 ret = 0; 1757 ret = 0;
1759out: 1758out:
diff --git a/fs/btrfs/ref-cache.c b/fs/btrfs/ref-cache.c
deleted file mode 100644
index 82d569cb6267..000000000000
--- a/fs/btrfs/ref-cache.c
+++ /dev/null
@@ -1,68 +0,0 @@
1/*
2 * Copyright (C) 2008 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/sched.h>
20#include <linux/slab.h>
21#include <linux/sort.h>
22#include "ctree.h"
23#include "ref-cache.h"
24#include "transaction.h"
25
26static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr,
27 struct rb_node *node)
28{
29 struct rb_node **p = &root->rb_node;
30 struct rb_node *parent = NULL;
31 struct btrfs_leaf_ref *entry;
32
33 while (*p) {
34 parent = *p;
35 entry = rb_entry(parent, struct btrfs_leaf_ref, rb_node);
36
37 if (bytenr < entry->bytenr)
38 p = &(*p)->rb_left;
39 else if (bytenr > entry->bytenr)
40 p = &(*p)->rb_right;
41 else
42 return parent;
43 }
44
45 entry = rb_entry(node, struct btrfs_leaf_ref, rb_node);
46 rb_link_node(node, parent, p);
47 rb_insert_color(node, root);
48 return NULL;
49}
50
51static struct rb_node *tree_search(struct rb_root *root, u64 bytenr)
52{
53 struct rb_node *n = root->rb_node;
54 struct btrfs_leaf_ref *entry;
55
56 while (n) {
57 entry = rb_entry(n, struct btrfs_leaf_ref, rb_node);
58 WARN_ON(!entry->in_tree);
59
60 if (bytenr < entry->bytenr)
61 n = n->rb_left;
62 else if (bytenr > entry->bytenr)
63 n = n->rb_right;
64 else
65 return n;
66 }
67 return NULL;
68}
diff --git a/fs/btrfs/ref-cache.h b/fs/btrfs/ref-cache.h
deleted file mode 100644
index 24f7001f6387..000000000000
--- a/fs/btrfs/ref-cache.h
+++ /dev/null
@@ -1,52 +0,0 @@
1/*
2 * Copyright (C) 2008 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18#ifndef __REFCACHE__
19#define __REFCACHE__
20
21struct btrfs_extent_info {
22 /* bytenr and num_bytes find the extent in the extent allocation tree */
23 u64 bytenr;
24 u64 num_bytes;
25
26 /* objectid and offset find the back reference for the file */
27 u64 objectid;
28 u64 offset;
29};
30
31struct btrfs_leaf_ref {
32 struct rb_node rb_node;
33 struct btrfs_leaf_ref_tree *tree;
34 int in_tree;
35 atomic_t usage;
36
37 u64 root_gen;
38 u64 bytenr;
39 u64 owner;
40 u64 generation;
41 int nritems;
42
43 struct list_head list;
44 struct btrfs_extent_info extents[];
45};
46
47static inline size_t btrfs_leaf_ref_size(int nr_extents)
48{
49 return sizeof(struct btrfs_leaf_ref) +
50 sizeof(struct btrfs_extent_info) * nr_extents;
51}
52#endif
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index ebe45443de06..f4099904565a 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -71,13 +71,12 @@ out:
71 return ret; 71 return ret;
72} 72}
73 73
74int btrfs_set_root_node(struct btrfs_root_item *item, 74void btrfs_set_root_node(struct btrfs_root_item *item,
75 struct extent_buffer *node) 75 struct extent_buffer *node)
76{ 76{
77 btrfs_set_root_bytenr(item, node->start); 77 btrfs_set_root_bytenr(item, node->start);
78 btrfs_set_root_level(item, btrfs_header_level(node)); 78 btrfs_set_root_level(item, btrfs_header_level(node));
79 btrfs_set_root_generation(item, btrfs_header_generation(node)); 79 btrfs_set_root_generation(item, btrfs_header_generation(node));
80 return 0;
81} 80}
82 81
83/* 82/*
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index eb55863bb4ae..7dc36fab4afc 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -216,17 +216,11 @@ static void wait_current_trans(struct btrfs_root *root)
216 spin_lock(&root->fs_info->trans_lock); 216 spin_lock(&root->fs_info->trans_lock);
217 cur_trans = root->fs_info->running_transaction; 217 cur_trans = root->fs_info->running_transaction;
218 if (cur_trans && cur_trans->blocked) { 218 if (cur_trans && cur_trans->blocked) {
219 DEFINE_WAIT(wait);
220 atomic_inc(&cur_trans->use_count); 219 atomic_inc(&cur_trans->use_count);
221 spin_unlock(&root->fs_info->trans_lock); 220 spin_unlock(&root->fs_info->trans_lock);
222 while (1) { 221
223 prepare_to_wait(&root->fs_info->transaction_wait, &wait, 222 wait_event(root->fs_info->transaction_wait,
224 TASK_UNINTERRUPTIBLE); 223 !cur_trans->blocked);
225 if (!cur_trans->blocked)
226 break;
227 schedule();
228 }
229 finish_wait(&root->fs_info->transaction_wait, &wait);
230 put_transaction(cur_trans); 224 put_transaction(cur_trans);
231 } else { 225 } else {
232 spin_unlock(&root->fs_info->trans_lock); 226 spin_unlock(&root->fs_info->trans_lock);
@@ -357,19 +351,10 @@ struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root
357} 351}
358 352
359/* wait for a transaction commit to be fully complete */ 353/* wait for a transaction commit to be fully complete */
360static noinline int wait_for_commit(struct btrfs_root *root, 354static noinline void wait_for_commit(struct btrfs_root *root,
361 struct btrfs_transaction *commit) 355 struct btrfs_transaction *commit)
362{ 356{
363 DEFINE_WAIT(wait); 357 wait_event(commit->commit_wait, commit->commit_done);
364 while (!commit->commit_done) {
365 prepare_to_wait(&commit->commit_wait, &wait,
366 TASK_UNINTERRUPTIBLE);
367 if (commit->commit_done)
368 break;
369 schedule();
370 }
371 finish_wait(&commit->commit_wait, &wait);
372 return 0;
373} 358}
374 359
375int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) 360int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
@@ -1085,22 +1070,7 @@ int btrfs_transaction_blocked(struct btrfs_fs_info *info)
1085static void wait_current_trans_commit_start(struct btrfs_root *root, 1070static void wait_current_trans_commit_start(struct btrfs_root *root,
1086 struct btrfs_transaction *trans) 1071 struct btrfs_transaction *trans)
1087{ 1072{
1088 DEFINE_WAIT(wait); 1073 wait_event(root->fs_info->transaction_blocked_wait, trans->in_commit);
1089
1090 if (trans->in_commit)
1091 return;
1092
1093 while (1) {
1094 prepare_to_wait(&root->fs_info->transaction_blocked_wait, &wait,
1095 TASK_UNINTERRUPTIBLE);
1096 if (trans->in_commit) {
1097 finish_wait(&root->fs_info->transaction_blocked_wait,
1098 &wait);
1099 break;
1100 }
1101 schedule();
1102 finish_wait(&root->fs_info->transaction_blocked_wait, &wait);
1103 }
1104} 1074}
1105 1075
1106/* 1076/*
@@ -1110,24 +1080,8 @@ static void wait_current_trans_commit_start(struct btrfs_root *root,
1110static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root, 1080static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root,
1111 struct btrfs_transaction *trans) 1081 struct btrfs_transaction *trans)
1112{ 1082{
1113 DEFINE_WAIT(wait); 1083 wait_event(root->fs_info->transaction_wait,
1114 1084 trans->commit_done || (trans->in_commit && !trans->blocked));
1115 if (trans->commit_done || (trans->in_commit && !trans->blocked))
1116 return;
1117
1118 while (1) {
1119 prepare_to_wait(&root->fs_info->transaction_wait, &wait,
1120 TASK_UNINTERRUPTIBLE);
1121 if (trans->commit_done ||
1122 (trans->in_commit && !trans->blocked)) {
1123 finish_wait(&root->fs_info->transaction_wait,
1124 &wait);
1125 break;
1126 }
1127 schedule();
1128 finish_wait(&root->fs_info->transaction_wait,
1129 &wait);
1130 }
1131} 1085}
1132 1086
1133/* 1087/*
@@ -1234,8 +1188,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1234 atomic_inc(&cur_trans->use_count); 1188 atomic_inc(&cur_trans->use_count);
1235 btrfs_end_transaction(trans, root); 1189 btrfs_end_transaction(trans, root);
1236 1190
1237 ret = wait_for_commit(root, cur_trans); 1191 wait_for_commit(root, cur_trans);
1238 BUG_ON(ret);
1239 1192
1240 put_transaction(cur_trans); 1193 put_transaction(cur_trans);
1241 1194
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index ac278dd83175..babee65f8eda 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1617,7 +1617,8 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
1617 return 0; 1617 return 0;
1618 1618
1619 path = btrfs_alloc_path(); 1619 path = btrfs_alloc_path();
1620 BUG_ON(!path); 1620 if (!path)
1621 return -ENOMEM;
1621 1622
1622 nritems = btrfs_header_nritems(eb); 1623 nritems = btrfs_header_nritems(eb);
1623 for (i = 0; i < nritems; i++) { 1624 for (i = 0; i < nritems; i++) {
@@ -1723,7 +1724,9 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
1723 return -ENOMEM; 1724 return -ENOMEM;
1724 1725
1725 if (*level == 1) { 1726 if (*level == 1) {
1726 wc->process_func(root, next, wc, ptr_gen); 1727 ret = wc->process_func(root, next, wc, ptr_gen);
1728 if (ret)
1729 return ret;
1727 1730
1728 path->slots[*level]++; 1731 path->slots[*level]++;
1729 if (wc->free) { 1732 if (wc->free) {
@@ -1788,8 +1791,11 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
1788 parent = path->nodes[*level + 1]; 1791 parent = path->nodes[*level + 1];
1789 1792
1790 root_owner = btrfs_header_owner(parent); 1793 root_owner = btrfs_header_owner(parent);
1791 wc->process_func(root, path->nodes[*level], wc, 1794 ret = wc->process_func(root, path->nodes[*level], wc,
1792 btrfs_header_generation(path->nodes[*level])); 1795 btrfs_header_generation(path->nodes[*level]));
1796 if (ret)
1797 return ret;
1798
1793 if (wc->free) { 1799 if (wc->free) {
1794 struct extent_buffer *next; 1800 struct extent_buffer *next;
1795 1801
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index b89e372c7544..53875ae73ad4 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1037,7 +1037,8 @@ static noinline int find_next_chunk(struct btrfs_root *root,
1037 struct btrfs_key found_key; 1037 struct btrfs_key found_key;
1038 1038
1039 path = btrfs_alloc_path(); 1039 path = btrfs_alloc_path();
1040 BUG_ON(!path); 1040 if (!path)
1041 return -ENOMEM;
1041 1042
1042 key.objectid = objectid; 1043 key.objectid = objectid;
1043 key.offset = (u64)-1; 1044 key.offset = (u64)-1;
@@ -2061,8 +2062,10 @@ int btrfs_balance(struct btrfs_root *dev_root)
2061 2062
2062 /* step two, relocate all the chunks */ 2063 /* step two, relocate all the chunks */
2063 path = btrfs_alloc_path(); 2064 path = btrfs_alloc_path();
2064 BUG_ON(!path); 2065 if (!path) {
2065 2066 ret = -ENOMEM;
2067 goto error;
2068 }
2066 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; 2069 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
2067 key.offset = (u64)-1; 2070 key.offset = (u64)-1;
2068 key.type = BTRFS_CHUNK_ITEM_KEY; 2071 key.type = BTRFS_CHUNK_ITEM_KEY;
@@ -2661,7 +2664,8 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
2661 2664
2662 ret = find_next_chunk(fs_info->chunk_root, 2665 ret = find_next_chunk(fs_info->chunk_root,
2663 BTRFS_FIRST_CHUNK_TREE_OBJECTID, &chunk_offset); 2666 BTRFS_FIRST_CHUNK_TREE_OBJECTID, &chunk_offset);
2664 BUG_ON(ret); 2667 if (ret)
2668 return ret;
2665 2669
2666 alloc_profile = BTRFS_BLOCK_GROUP_METADATA | 2670 alloc_profile = BTRFS_BLOCK_GROUP_METADATA |
2667 (fs_info->metadata_alloc_profile & 2671 (fs_info->metadata_alloc_profile &
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 8d8f28c94c0f..6873bb634a97 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -141,10 +141,11 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
141 141
142 rc = dns_resolve_server_name_to_ip(*devname, &srvIP); 142 rc = dns_resolve_server_name_to_ip(*devname, &srvIP);
143 if (rc < 0) { 143 if (rc < 0) {
144 cERROR(1, "%s: Failed to resolve server part of %s to IP: %d", 144 cFYI(1, "%s: Failed to resolve server part of %s to IP: %d",
145 __func__, *devname, rc); 145 __func__, *devname, rc);
146 goto compose_mount_options_err; 146 goto compose_mount_options_err;
147 } 147 }
148
148 /* md_len = strlen(...) + 12 for 'sep+prefixpath=' 149 /* md_len = strlen(...) + 12 for 'sep+prefixpath='
149 * assuming that we have 'unc=' and 'ip=' in 150 * assuming that we have 'unc=' and 'ip=' in
150 * the original sb_mountdata 151 * the original sb_mountdata
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 212e5629cc1d..f93eb948d071 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -563,6 +563,10 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb)
563 mutex_unlock(&dir->i_mutex); 563 mutex_unlock(&dir->i_mutex);
564 dput(dentry); 564 dput(dentry);
565 dentry = child; 565 dentry = child;
566 if (!dentry->d_inode) {
567 dput(dentry);
568 dentry = ERR_PTR(-ENOENT);
569 }
566 } while (!IS_ERR(dentry)); 570 } while (!IS_ERR(dentry));
567 _FreeXid(xid); 571 _FreeXid(xid);
568 kfree(full_path); 572 kfree(full_path);
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c
index 548f06230a6d..1d2d91d9bf65 100644
--- a/fs/cifs/dns_resolve.c
+++ b/fs/cifs/dns_resolve.c
@@ -79,8 +79,8 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr)
79 /* Perform the upcall */ 79 /* Perform the upcall */
80 rc = dns_query(NULL, hostname, len, NULL, ip_addr, NULL); 80 rc = dns_query(NULL, hostname, len, NULL, ip_addr, NULL);
81 if (rc < 0) 81 if (rc < 0)
82 cERROR(1, "%s: unable to resolve: %*.*s", 82 cFYI(1, "%s: unable to resolve: %*.*s",
83 __func__, len, len, hostname); 83 __func__, len, len, hostname);
84 else 84 else
85 cFYI(1, "%s: resolved: %*.*s to %s", 85 cFYI(1, "%s: resolved: %*.*s to %s",
86 __func__, len, len, hostname, *ip_addr); 86 __func__, len, len, hostname, *ip_addr);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 9b018c8334fa..a7b2dcd4a53e 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -764,20 +764,10 @@ char *cifs_build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb,
764 if (full_path == NULL) 764 if (full_path == NULL)
765 return full_path; 765 return full_path;
766 766
767 if (dfsplen) { 767 if (dfsplen)
768 strncpy(full_path, tcon->treeName, dfsplen); 768 strncpy(full_path, tcon->treeName, dfsplen);
769 /* switch slash direction in prepath depending on whether
770 * windows or posix style path names
771 */
772 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) {
773 int i;
774 for (i = 0; i < dfsplen; i++) {
775 if (full_path[i] == '\\')
776 full_path[i] = '/';
777 }
778 }
779 }
780 strncpy(full_path + dfsplen, vol->prepath, pplen); 769 strncpy(full_path + dfsplen, vol->prepath, pplen);
770 convert_delimiter(full_path, CIFS_DIR_SEP(cifs_sb));
781 full_path[dfsplen + pplen] = 0; /* add trailing null */ 771 full_path[dfsplen + pplen] = 0; /* add trailing null */
782 return full_path; 772 return full_path;
783} 773}
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 243d58720513..d3e619692ee0 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -124,8 +124,7 @@ static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, SESSION_SETUP_ANDX *pSMB)
124 /* that we use in next few lines */ 124 /* that we use in next few lines */
125 /* Note that header is initialized to zero in header_assemble */ 125 /* Note that header is initialized to zero in header_assemble */
126 pSMB->req.AndXCommand = 0xFF; 126 pSMB->req.AndXCommand = 0xFF;
127 pSMB->req.MaxBufferSize = cpu_to_le16(min_t(u32, CIFSMaxBufSize - 4, 127 pSMB->req.MaxBufferSize = cpu_to_le16(ses->server->maxBuf);
128 USHRT_MAX));
129 pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq); 128 pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq);
130 pSMB->req.VcNumber = get_next_vcnum(ses); 129 pSMB->req.VcNumber = get_next_vcnum(ses);
131 130
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 147aa22c3c3a..c1b9c4b10739 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -362,6 +362,8 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov,
362 mid = AllocMidQEntry(hdr, server); 362 mid = AllocMidQEntry(hdr, server);
363 if (mid == NULL) { 363 if (mid == NULL) {
364 mutex_unlock(&server->srv_mutex); 364 mutex_unlock(&server->srv_mutex);
365 atomic_dec(&server->inFlight);
366 wake_up(&server->request_q);
365 return -ENOMEM; 367 return -ENOMEM;
366 } 368 }
367 369
diff --git a/fs/dcache.c b/fs/dcache.c
index 2347cdb15abb..a88948b8bd17 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -795,6 +795,7 @@ relock:
795 795
796/** 796/**
797 * prune_dcache_sb - shrink the dcache 797 * prune_dcache_sb - shrink the dcache
798 * @sb: superblock
798 * @nr_to_scan: number of entries to try to free 799 * @nr_to_scan: number of entries to try to free
799 * 800 *
800 * Attempt to shrink the superblock dcache LRU by @nr_to_scan entries. This is 801 * Attempt to shrink the superblock dcache LRU by @nr_to_scan entries. This is
@@ -1728,7 +1729,7 @@ seqretry:
1728 */ 1729 */
1729 if (read_seqcount_retry(&dentry->d_seq, *seq)) 1730 if (read_seqcount_retry(&dentry->d_seq, *seq))
1730 goto seqretry; 1731 goto seqretry;
1731 if (parent->d_flags & DCACHE_OP_COMPARE) { 1732 if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) {
1732 if (parent->d_op->d_compare(parent, *inode, 1733 if (parent->d_op->d_compare(parent, *inode,
1733 dentry, i, 1734 dentry, i,
1734 tlen, tname, name)) 1735 tlen, tname, name))
diff --git a/fs/exofs/Kbuild b/fs/exofs/Kbuild
index 2d0f757fda3e..c5a5855a6c44 100644
--- a/fs/exofs/Kbuild
+++ b/fs/exofs/Kbuild
@@ -12,5 +12,8 @@
12# Kbuild - Gets included from the Kernels Makefile and build system 12# Kbuild - Gets included from the Kernels Makefile and build system
13# 13#
14 14
15exofs-y := ios.o inode.o file.o symlink.o namei.o dir.o super.o 15# ore module library
16obj-$(CONFIG_ORE) += ore.o
17
18exofs-y := inode.o file.o symlink.o namei.o dir.o super.o
16obj-$(CONFIG_EXOFS_FS) += exofs.o 19obj-$(CONFIG_EXOFS_FS) += exofs.o
diff --git a/fs/exofs/Kconfig b/fs/exofs/Kconfig
index 86194b2f799d..70bae4149291 100644
--- a/fs/exofs/Kconfig
+++ b/fs/exofs/Kconfig
@@ -1,6 +1,10 @@
1config ORE
2 tristate
3
1config EXOFS_FS 4config EXOFS_FS
2 tristate "exofs: OSD based file system support" 5 tristate "exofs: OSD based file system support"
3 depends on SCSI_OSD_ULD 6 depends on SCSI_OSD_ULD
7 select ORE
4 help 8 help
5 EXOFS is a file system that uses an OSD storage device, 9 EXOFS is a file system that uses an OSD storage device,
6 as its backing storage. 10 as its backing storage.
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index c965806c2821..f4e442ec7445 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -36,12 +36,9 @@
36#include <linux/fs.h> 36#include <linux/fs.h>
37#include <linux/time.h> 37#include <linux/time.h>
38#include <linux/backing-dev.h> 38#include <linux/backing-dev.h>
39#include "common.h" 39#include <scsi/osd_ore.h>
40 40
41/* FIXME: Remove once pnfs hits mainline 41#include "common.h"
42 * #include <linux/exportfs/pnfs_osd_xdr.h>
43 */
44#include "pnfs.h"
45 42
46#define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a) 43#define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a)
47 44
@@ -56,27 +53,11 @@
56/* u64 has problems with printk this will cast it to unsigned long long */ 53/* u64 has problems with printk this will cast it to unsigned long long */
57#define _LLU(x) (unsigned long long)(x) 54#define _LLU(x) (unsigned long long)(x)
58 55
59struct exofs_layout {
60 osd_id s_pid; /* partition ID of file system*/
61
62 /* Our way of looking at the data_map */
63 unsigned stripe_unit;
64 unsigned mirrors_p1;
65
66 unsigned group_width;
67 u64 group_depth;
68 unsigned group_count;
69
70 enum exofs_inode_layout_gen_functions lay_func;
71
72 unsigned s_numdevs; /* Num of devices in array */
73 struct osd_dev *s_ods[0]; /* Variable length */
74};
75
76/* 56/*
77 * our extension to the in-memory superblock 57 * our extension to the in-memory superblock
78 */ 58 */
79struct exofs_sb_info { 59struct exofs_sb_info {
60 struct backing_dev_info bdi; /* register our bdi with VFS */
80 struct exofs_sb_stats s_ess; /* Written often, pre-allocate*/ 61 struct exofs_sb_stats s_ess; /* Written often, pre-allocate*/
81 int s_timeout; /* timeout for OSD operations */ 62 int s_timeout; /* timeout for OSD operations */
82 uint64_t s_nextid; /* highest object ID used */ 63 uint64_t s_nextid; /* highest object ID used */
@@ -84,16 +65,13 @@ struct exofs_sb_info {
84 spinlock_t s_next_gen_lock; /* spinlock for gen # update */ 65 spinlock_t s_next_gen_lock; /* spinlock for gen # update */
85 u32 s_next_generation; /* next gen # to use */ 66 u32 s_next_generation; /* next gen # to use */
86 atomic_t s_curr_pending; /* number of pending commands */ 67 atomic_t s_curr_pending; /* number of pending commands */
87 uint8_t s_cred[OSD_CAP_LEN]; /* credential for the fscb */
88 struct backing_dev_info bdi; /* register our bdi with VFS */
89 68
90 struct pnfs_osd_data_map data_map; /* Default raid to use 69 struct pnfs_osd_data_map data_map; /* Default raid to use
91 * FIXME: Needed ? 70 * FIXME: Needed ?
92 */ 71 */
93/* struct exofs_layout dir_layout;*/ /* Default dir layout */ 72 struct ore_layout layout; /* Default files layout */
94 struct exofs_layout layout; /* Default files layout, 73 struct ore_comp one_comp; /* id & cred of partition id=0*/
95 * contains the variable osd_dev 74 struct ore_components comps; /* comps for the partition */
96 * array. Keep last */
97 struct osd_dev *_min_one_dev[1]; /* Place holder for one dev */ 75 struct osd_dev *_min_one_dev[1]; /* Place holder for one dev */
98}; 76};
99 77
@@ -107,7 +85,8 @@ struct exofs_i_info {
107 uint32_t i_data[EXOFS_IDATA];/*short symlink names and device #s*/ 85 uint32_t i_data[EXOFS_IDATA];/*short symlink names and device #s*/
108 uint32_t i_dir_start_lookup; /* which page to start lookup */ 86 uint32_t i_dir_start_lookup; /* which page to start lookup */
109 uint64_t i_commit_size; /* the object's written length */ 87 uint64_t i_commit_size; /* the object's written length */
110 uint8_t i_cred[OSD_CAP_LEN];/* all-powerful credential */ 88 struct ore_comp one_comp; /* same component for all devices */
89 struct ore_components comps; /* inode view of the device table */
111}; 90};
112 91
113static inline osd_id exofs_oi_objno(struct exofs_i_info *oi) 92static inline osd_id exofs_oi_objno(struct exofs_i_info *oi)
@@ -115,52 +94,6 @@ static inline osd_id exofs_oi_objno(struct exofs_i_info *oi)
115 return oi->vfs_inode.i_ino + EXOFS_OBJ_OFF; 94 return oi->vfs_inode.i_ino + EXOFS_OBJ_OFF;
116} 95}
117 96
118struct exofs_io_state;
119typedef void (*exofs_io_done_fn)(struct exofs_io_state *or, void *private);
120
121struct exofs_io_state {
122 struct kref kref;
123
124 void *private;
125 exofs_io_done_fn done;
126
127 struct exofs_layout *layout;
128 struct osd_obj_id obj;
129 u8 *cred;
130
131 /* Global read/write IO*/
132 loff_t offset;
133 unsigned long length;
134 void *kern_buff;
135
136 struct page **pages;
137 unsigned nr_pages;
138 unsigned pgbase;
139 unsigned pages_consumed;
140
141 /* Attributes */
142 unsigned in_attr_len;
143 struct osd_attr *in_attr;
144 unsigned out_attr_len;
145 struct osd_attr *out_attr;
146
147 /* Variable array of size numdevs */
148 unsigned numdevs;
149 struct exofs_per_dev_state {
150 struct osd_request *or;
151 struct bio *bio;
152 loff_t offset;
153 unsigned length;
154 unsigned dev;
155 } per_dev[];
156};
157
158static inline unsigned exofs_io_state_size(unsigned numdevs)
159{
160 return sizeof(struct exofs_io_state) +
161 sizeof(struct exofs_per_dev_state) * numdevs;
162}
163
164/* 97/*
165 * our inode flags 98 * our inode flags
166 */ 99 */
@@ -205,12 +138,6 @@ static inline struct exofs_i_info *exofs_i(struct inode *inode)
205} 138}
206 139
207/* 140/*
208 * Given a layout, object_number and stripe_index return the associated global
209 * dev_index
210 */
211unsigned exofs_layout_od_id(struct exofs_layout *layout,
212 osd_id obj_no, unsigned layout_index);
213/*
214 * Maximum count of links to a file 141 * Maximum count of links to a file
215 */ 142 */
216#define EXOFS_LINK_MAX 32000 143#define EXOFS_LINK_MAX 32000
@@ -219,44 +146,8 @@ unsigned exofs_layout_od_id(struct exofs_layout *layout,
219 * function declarations * 146 * function declarations *
220 *************************/ 147 *************************/
221 148
222/* ios.c */
223void exofs_make_credential(u8 cred_a[OSD_CAP_LEN],
224 const struct osd_obj_id *obj);
225int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj,
226 u64 offset, void *p, unsigned length);
227
228int exofs_get_io_state(struct exofs_layout *layout,
229 struct exofs_io_state **ios);
230void exofs_put_io_state(struct exofs_io_state *ios);
231
232int exofs_check_io(struct exofs_io_state *ios, u64 *resid);
233
234int exofs_sbi_create(struct exofs_io_state *ios);
235int exofs_sbi_remove(struct exofs_io_state *ios);
236int exofs_sbi_write(struct exofs_io_state *ios);
237int exofs_sbi_read(struct exofs_io_state *ios);
238
239int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr);
240
241int exofs_oi_truncate(struct exofs_i_info *oi, u64 new_len);
242static inline int exofs_oi_write(struct exofs_i_info *oi,
243 struct exofs_io_state *ios)
244{
245 ios->obj.id = exofs_oi_objno(oi);
246 ios->cred = oi->i_cred;
247 return exofs_sbi_write(ios);
248}
249
250static inline int exofs_oi_read(struct exofs_i_info *oi,
251 struct exofs_io_state *ios)
252{
253 ios->obj.id = exofs_oi_objno(oi);
254 ios->cred = oi->i_cred;
255 return exofs_sbi_read(ios);
256}
257
258/* inode.c */ 149/* inode.c */
259unsigned exofs_max_io_pages(struct exofs_layout *layout, 150unsigned exofs_max_io_pages(struct ore_layout *layout,
260 unsigned expected_pages); 151 unsigned expected_pages);
261int exofs_setattr(struct dentry *, struct iattr *); 152int exofs_setattr(struct dentry *, struct iattr *);
262int exofs_write_begin(struct file *file, struct address_space *mapping, 153int exofs_write_begin(struct file *file, struct address_space *mapping,
@@ -281,6 +172,8 @@ int exofs_set_link(struct inode *, struct exofs_dir_entry *, struct page *,
281 struct inode *); 172 struct inode *);
282 173
283/* super.c */ 174/* super.c */
175void exofs_make_credential(u8 cred_a[OSD_CAP_LEN],
176 const struct osd_obj_id *obj);
284int exofs_sbi_write_stats(struct exofs_sb_info *sbi); 177int exofs_sbi_write_stats(struct exofs_sb_info *sbi);
285 178
286/********************* 179/*********************
@@ -295,7 +188,6 @@ extern const struct file_operations exofs_file_operations;
295 188
296/* inode.c */ 189/* inode.c */
297extern const struct address_space_operations exofs_aops; 190extern const struct address_space_operations exofs_aops;
298extern const struct osd_attr g_attr_logical_length;
299 191
300/* namei.c */ 192/* namei.c */
301extern const struct inode_operations exofs_dir_inode_operations; 193extern const struct inode_operations exofs_dir_inode_operations;
@@ -305,4 +197,33 @@ extern const struct inode_operations exofs_special_inode_operations;
305extern const struct inode_operations exofs_symlink_inode_operations; 197extern const struct inode_operations exofs_symlink_inode_operations;
306extern const struct inode_operations exofs_fast_symlink_inode_operations; 198extern const struct inode_operations exofs_fast_symlink_inode_operations;
307 199
200/* exofs_init_comps will initialize an ore_components device array
201 * pointing to a single ore_comp struct, and a round-robin view
202 * of the device table.
203 * The first device of each inode is the [inode->ino % num_devices]
204 * and the rest of the devices sequentially following where the
205 * first device is after the last device.
206 * It is assumed that the global device array at @sbi is twice
207 * bigger and that the device table repeats twice.
208 * See: exofs_read_lookup_dev_table()
209 */
210static inline void exofs_init_comps(struct ore_components *comps,
211 struct ore_comp *one_comp,
212 struct exofs_sb_info *sbi, osd_id oid)
213{
214 unsigned dev_mod = (unsigned)oid, first_dev;
215
216 one_comp->obj.partition = sbi->one_comp.obj.partition;
217 one_comp->obj.id = oid;
218 exofs_make_credential(one_comp->cred, &one_comp->obj);
219
220 comps->numdevs = sbi->comps.numdevs;
221 comps->single_comp = EC_SINGLE_COMP;
222 comps->comps = one_comp;
223
224 /* Round robin device view of the table */
225 first_dev = (dev_mod * sbi->layout.mirrors_p1) % sbi->comps.numdevs;
226 comps->ods = sbi->comps.ods + first_dev;
227}
228
308#endif 229#endif
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 8472c098445d..f39a38fc2349 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -43,7 +43,7 @@ enum { BIO_MAX_PAGES_KMALLOC =
43 PAGE_SIZE / sizeof(struct page *), 43 PAGE_SIZE / sizeof(struct page *),
44}; 44};
45 45
46unsigned exofs_max_io_pages(struct exofs_layout *layout, 46unsigned exofs_max_io_pages(struct ore_layout *layout,
47 unsigned expected_pages) 47 unsigned expected_pages)
48{ 48{
49 unsigned pages = min_t(unsigned, expected_pages, MAX_PAGES_KMALLOC); 49 unsigned pages = min_t(unsigned, expected_pages, MAX_PAGES_KMALLOC);
@@ -58,7 +58,7 @@ struct page_collect {
58 struct exofs_sb_info *sbi; 58 struct exofs_sb_info *sbi;
59 struct inode *inode; 59 struct inode *inode;
60 unsigned expected_pages; 60 unsigned expected_pages;
61 struct exofs_io_state *ios; 61 struct ore_io_state *ios;
62 62
63 struct page **pages; 63 struct page **pages;
64 unsigned alloc_pages; 64 unsigned alloc_pages;
@@ -110,13 +110,6 @@ static int pcol_try_alloc(struct page_collect *pcol)
110{ 110{
111 unsigned pages; 111 unsigned pages;
112 112
113 if (!pcol->ios) { /* First time allocate io_state */
114 int ret = exofs_get_io_state(&pcol->sbi->layout, &pcol->ios);
115
116 if (ret)
117 return ret;
118 }
119
120 /* TODO: easily support bio chaining */ 113 /* TODO: easily support bio chaining */
121 pages = exofs_max_io_pages(&pcol->sbi->layout, pcol->expected_pages); 114 pages = exofs_max_io_pages(&pcol->sbi->layout, pcol->expected_pages);
122 115
@@ -140,7 +133,7 @@ static void pcol_free(struct page_collect *pcol)
140 pcol->pages = NULL; 133 pcol->pages = NULL;
141 134
142 if (pcol->ios) { 135 if (pcol->ios) {
143 exofs_put_io_state(pcol->ios); 136 ore_put_io_state(pcol->ios);
144 pcol->ios = NULL; 137 pcol->ios = NULL;
145 } 138 }
146} 139}
@@ -200,7 +193,7 @@ static int __readpages_done(struct page_collect *pcol)
200 u64 resid; 193 u64 resid;
201 u64 good_bytes; 194 u64 good_bytes;
202 u64 length = 0; 195 u64 length = 0;
203 int ret = exofs_check_io(pcol->ios, &resid); 196 int ret = ore_check_io(pcol->ios, &resid);
204 197
205 if (likely(!ret)) 198 if (likely(!ret))
206 good_bytes = pcol->length; 199 good_bytes = pcol->length;
@@ -241,7 +234,7 @@ static int __readpages_done(struct page_collect *pcol)
241} 234}
242 235
243/* callback of async reads */ 236/* callback of async reads */
244static void readpages_done(struct exofs_io_state *ios, void *p) 237static void readpages_done(struct ore_io_state *ios, void *p)
245{ 238{
246 struct page_collect *pcol = p; 239 struct page_collect *pcol = p;
247 240
@@ -269,20 +262,28 @@ static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw)
269static int read_exec(struct page_collect *pcol) 262static int read_exec(struct page_collect *pcol)
270{ 263{
271 struct exofs_i_info *oi = exofs_i(pcol->inode); 264 struct exofs_i_info *oi = exofs_i(pcol->inode);
272 struct exofs_io_state *ios = pcol->ios; 265 struct ore_io_state *ios;
273 struct page_collect *pcol_copy = NULL; 266 struct page_collect *pcol_copy = NULL;
274 int ret; 267 int ret;
275 268
276 if (!pcol->pages) 269 if (!pcol->pages)
277 return 0; 270 return 0;
278 271
272 if (!pcol->ios) {
273 int ret = ore_get_rw_state(&pcol->sbi->layout, &oi->comps, true,
274 pcol->pg_first << PAGE_CACHE_SHIFT,
275 pcol->length, &pcol->ios);
276
277 if (ret)
278 return ret;
279 }
280
281 ios = pcol->ios;
279 ios->pages = pcol->pages; 282 ios->pages = pcol->pages;
280 ios->nr_pages = pcol->nr_pages; 283 ios->nr_pages = pcol->nr_pages;
281 ios->length = pcol->length;
282 ios->offset = pcol->pg_first << PAGE_CACHE_SHIFT;
283 284
284 if (pcol->read_4_write) { 285 if (pcol->read_4_write) {
285 exofs_oi_read(oi, pcol->ios); 286 ore_read(pcol->ios);
286 return __readpages_done(pcol); 287 return __readpages_done(pcol);
287 } 288 }
288 289
@@ -295,14 +296,14 @@ static int read_exec(struct page_collect *pcol)
295 *pcol_copy = *pcol; 296 *pcol_copy = *pcol;
296 ios->done = readpages_done; 297 ios->done = readpages_done;
297 ios->private = pcol_copy; 298 ios->private = pcol_copy;
298 ret = exofs_oi_read(oi, ios); 299 ret = ore_read(ios);
299 if (unlikely(ret)) 300 if (unlikely(ret))
300 goto err; 301 goto err;
301 302
302 atomic_inc(&pcol->sbi->s_curr_pending); 303 atomic_inc(&pcol->sbi->s_curr_pending);
303 304
304 EXOFS_DBGMSG2("read_exec obj=0x%llx start=0x%llx length=0x%lx\n", 305 EXOFS_DBGMSG2("read_exec obj=0x%llx start=0x%llx length=0x%lx\n",
305 ios->obj.id, _LLU(ios->offset), pcol->length); 306 oi->one_comp.obj.id, _LLU(ios->offset), pcol->length);
306 307
307 /* pages ownership was passed to pcol_copy */ 308 /* pages ownership was passed to pcol_copy */
308 _pcol_reset(pcol); 309 _pcol_reset(pcol);
@@ -457,14 +458,14 @@ static int exofs_readpage(struct file *file, struct page *page)
457} 458}
458 459
459/* Callback for osd_write. All writes are asynchronous */ 460/* Callback for osd_write. All writes are asynchronous */
460static void writepages_done(struct exofs_io_state *ios, void *p) 461static void writepages_done(struct ore_io_state *ios, void *p)
461{ 462{
462 struct page_collect *pcol = p; 463 struct page_collect *pcol = p;
463 int i; 464 int i;
464 u64 resid; 465 u64 resid;
465 u64 good_bytes; 466 u64 good_bytes;
466 u64 length = 0; 467 u64 length = 0;
467 int ret = exofs_check_io(ios, &resid); 468 int ret = ore_check_io(ios, &resid);
468 469
469 atomic_dec(&pcol->sbi->s_curr_pending); 470 atomic_dec(&pcol->sbi->s_curr_pending);
470 471
@@ -507,13 +508,21 @@ static void writepages_done(struct exofs_io_state *ios, void *p)
507static int write_exec(struct page_collect *pcol) 508static int write_exec(struct page_collect *pcol)
508{ 509{
509 struct exofs_i_info *oi = exofs_i(pcol->inode); 510 struct exofs_i_info *oi = exofs_i(pcol->inode);
510 struct exofs_io_state *ios = pcol->ios; 511 struct ore_io_state *ios;
511 struct page_collect *pcol_copy = NULL; 512 struct page_collect *pcol_copy = NULL;
512 int ret; 513 int ret;
513 514
514 if (!pcol->pages) 515 if (!pcol->pages)
515 return 0; 516 return 0;
516 517
518 BUG_ON(pcol->ios);
519 ret = ore_get_rw_state(&pcol->sbi->layout, &oi->comps, false,
520 pcol->pg_first << PAGE_CACHE_SHIFT,
521 pcol->length, &pcol->ios);
522
523 if (unlikely(ret))
524 goto err;
525
517 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); 526 pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL);
518 if (!pcol_copy) { 527 if (!pcol_copy) {
519 EXOFS_ERR("write_exec: Failed to kmalloc(pcol)\n"); 528 EXOFS_ERR("write_exec: Failed to kmalloc(pcol)\n");
@@ -523,16 +532,15 @@ static int write_exec(struct page_collect *pcol)
523 532
524 *pcol_copy = *pcol; 533 *pcol_copy = *pcol;
525 534
535 ios = pcol->ios;
526 ios->pages = pcol_copy->pages; 536 ios->pages = pcol_copy->pages;
527 ios->nr_pages = pcol_copy->nr_pages; 537 ios->nr_pages = pcol_copy->nr_pages;
528 ios->offset = pcol_copy->pg_first << PAGE_CACHE_SHIFT;
529 ios->length = pcol_copy->length;
530 ios->done = writepages_done; 538 ios->done = writepages_done;
531 ios->private = pcol_copy; 539 ios->private = pcol_copy;
532 540
533 ret = exofs_oi_write(oi, ios); 541 ret = ore_write(ios);
534 if (unlikely(ret)) { 542 if (unlikely(ret)) {
535 EXOFS_ERR("write_exec: exofs_oi_write() Failed\n"); 543 EXOFS_ERR("write_exec: ore_write() Failed\n");
536 goto err; 544 goto err;
537 } 545 }
538 546
@@ -844,17 +852,15 @@ static inline int exofs_inode_is_fast_symlink(struct inode *inode)
844 return S_ISLNK(inode->i_mode) && (oi->i_data[0] != 0); 852 return S_ISLNK(inode->i_mode) && (oi->i_data[0] != 0);
845} 853}
846 854
847const struct osd_attr g_attr_logical_length = ATTR_DEF(
848 OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8);
849
850static int _do_truncate(struct inode *inode, loff_t newsize) 855static int _do_truncate(struct inode *inode, loff_t newsize)
851{ 856{
852 struct exofs_i_info *oi = exofs_i(inode); 857 struct exofs_i_info *oi = exofs_i(inode);
858 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
853 int ret; 859 int ret;
854 860
855 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 861 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
856 862
857 ret = exofs_oi_truncate(oi, (u64)newsize); 863 ret = ore_truncate(&sbi->layout, &oi->comps, (u64)newsize);
858 if (likely(!ret)) 864 if (likely(!ret))
859 truncate_setsize(inode, newsize); 865 truncate_setsize(inode, newsize);
860 866
@@ -917,30 +923,26 @@ static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi,
917 [1] = g_attr_inode_file_layout, 923 [1] = g_attr_inode_file_layout,
918 [2] = g_attr_inode_dir_layout, 924 [2] = g_attr_inode_dir_layout,
919 }; 925 };
920 struct exofs_io_state *ios; 926 struct ore_io_state *ios;
921 struct exofs_on_disk_inode_layout *layout; 927 struct exofs_on_disk_inode_layout *layout;
922 int ret; 928 int ret;
923 929
924 ret = exofs_get_io_state(&sbi->layout, &ios); 930 ret = ore_get_io_state(&sbi->layout, &oi->comps, &ios);
925 if (unlikely(ret)) { 931 if (unlikely(ret)) {
926 EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); 932 EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__);
927 return ret; 933 return ret;
928 } 934 }
929 935
930 ios->obj.id = exofs_oi_objno(oi); 936 attrs[1].len = exofs_on_disk_inode_layout_size(sbi->comps.numdevs);
931 exofs_make_credential(oi->i_cred, &ios->obj); 937 attrs[2].len = exofs_on_disk_inode_layout_size(sbi->comps.numdevs);
932 ios->cred = oi->i_cred;
933
934 attrs[1].len = exofs_on_disk_inode_layout_size(sbi->layout.s_numdevs);
935 attrs[2].len = exofs_on_disk_inode_layout_size(sbi->layout.s_numdevs);
936 938
937 ios->in_attr = attrs; 939 ios->in_attr = attrs;
938 ios->in_attr_len = ARRAY_SIZE(attrs); 940 ios->in_attr_len = ARRAY_SIZE(attrs);
939 941
940 ret = exofs_sbi_read(ios); 942 ret = ore_read(ios);
941 if (unlikely(ret)) { 943 if (unlikely(ret)) {
942 EXOFS_ERR("object(0x%llx) corrupted, return empty file=>%d\n", 944 EXOFS_ERR("object(0x%llx) corrupted, return empty file=>%d\n",
943 _LLU(ios->obj.id), ret); 945 _LLU(oi->one_comp.obj.id), ret);
944 memset(inode, 0, sizeof(*inode)); 946 memset(inode, 0, sizeof(*inode));
945 inode->i_mode = 0040000 | (0777 & ~022); 947 inode->i_mode = 0040000 | (0777 & ~022);
946 /* If object is lost on target we might as well enable it's 948 /* If object is lost on target we might as well enable it's
@@ -990,7 +992,7 @@ static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi,
990 } 992 }
991 993
992out: 994out:
993 exofs_put_io_state(ios); 995 ore_put_io_state(ios);
994 return ret; 996 return ret;
995} 997}
996 998
@@ -1016,6 +1018,8 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
1016 return inode; 1018 return inode;
1017 oi = exofs_i(inode); 1019 oi = exofs_i(inode);
1018 __oi_init(oi); 1020 __oi_init(oi);
1021 exofs_init_comps(&oi->comps, &oi->one_comp, sb->s_fs_info,
1022 exofs_oi_objno(oi));
1019 1023
1020 /* read the inode from the osd */ 1024 /* read the inode from the osd */
1021 ret = exofs_get_inode(sb, oi, &fcb); 1025 ret = exofs_get_inode(sb, oi, &fcb);
@@ -1107,21 +1111,22 @@ int __exofs_wait_obj_created(struct exofs_i_info *oi)
1107 * set the obj_created flag so that other methods know that the object exists on 1111 * set the obj_created flag so that other methods know that the object exists on
1108 * the OSD. 1112 * the OSD.
1109 */ 1113 */
1110static void create_done(struct exofs_io_state *ios, void *p) 1114static void create_done(struct ore_io_state *ios, void *p)
1111{ 1115{
1112 struct inode *inode = p; 1116 struct inode *inode = p;
1113 struct exofs_i_info *oi = exofs_i(inode); 1117 struct exofs_i_info *oi = exofs_i(inode);
1114 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; 1118 struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
1115 int ret; 1119 int ret;
1116 1120
1117 ret = exofs_check_io(ios, NULL); 1121 ret = ore_check_io(ios, NULL);
1118 exofs_put_io_state(ios); 1122 ore_put_io_state(ios);
1119 1123
1120 atomic_dec(&sbi->s_curr_pending); 1124 atomic_dec(&sbi->s_curr_pending);
1121 1125
1122 if (unlikely(ret)) { 1126 if (unlikely(ret)) {
1123 EXOFS_ERR("object=0x%llx creation failed in pid=0x%llx", 1127 EXOFS_ERR("object=0x%llx creation failed in pid=0x%llx",
1124 _LLU(exofs_oi_objno(oi)), _LLU(sbi->layout.s_pid)); 1128 _LLU(exofs_oi_objno(oi)),
1129 _LLU(oi->one_comp.obj.partition));
1125 /*TODO: When FS is corrupted creation can fail, object already 1130 /*TODO: When FS is corrupted creation can fail, object already
1126 * exist. Get rid of this asynchronous creation, if exist 1131 * exist. Get rid of this asynchronous creation, if exist
1127 * increment the obj counter and try the next object. Until we 1132 * increment the obj counter and try the next object. Until we
@@ -1140,14 +1145,13 @@ static void create_done(struct exofs_io_state *ios, void *p)
1140 */ 1145 */
1141struct inode *exofs_new_inode(struct inode *dir, int mode) 1146struct inode *exofs_new_inode(struct inode *dir, int mode)
1142{ 1147{
1143 struct super_block *sb; 1148 struct super_block *sb = dir->i_sb;
1149 struct exofs_sb_info *sbi = sb->s_fs_info;
1144 struct inode *inode; 1150 struct inode *inode;
1145 struct exofs_i_info *oi; 1151 struct exofs_i_info *oi;
1146 struct exofs_sb_info *sbi; 1152 struct ore_io_state *ios;
1147 struct exofs_io_state *ios;
1148 int ret; 1153 int ret;
1149 1154
1150 sb = dir->i_sb;
1151 inode = new_inode(sb); 1155 inode = new_inode(sb);
1152 if (!inode) 1156 if (!inode)
1153 return ERR_PTR(-ENOMEM); 1157 return ERR_PTR(-ENOMEM);
@@ -1157,8 +1161,6 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
1157 1161
1158 set_obj_2bcreated(oi); 1162 set_obj_2bcreated(oi);
1159 1163
1160 sbi = sb->s_fs_info;
1161
1162 inode->i_mapping->backing_dev_info = sb->s_bdi; 1164 inode->i_mapping->backing_dev_info = sb->s_bdi;
1163 inode_init_owner(inode, dir, mode); 1165 inode_init_owner(inode, dir, mode);
1164 inode->i_ino = sbi->s_nextid++; 1166 inode->i_ino = sbi->s_nextid++;
@@ -1170,25 +1172,24 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
1170 spin_unlock(&sbi->s_next_gen_lock); 1172 spin_unlock(&sbi->s_next_gen_lock);
1171 insert_inode_hash(inode); 1173 insert_inode_hash(inode);
1172 1174
1175 exofs_init_comps(&oi->comps, &oi->one_comp, sb->s_fs_info,
1176 exofs_oi_objno(oi));
1173 exofs_sbi_write_stats(sbi); /* Make sure new sbi->s_nextid is on disk */ 1177 exofs_sbi_write_stats(sbi); /* Make sure new sbi->s_nextid is on disk */
1174 1178
1175 mark_inode_dirty(inode); 1179 mark_inode_dirty(inode);
1176 1180
1177 ret = exofs_get_io_state(&sbi->layout, &ios); 1181 ret = ore_get_io_state(&sbi->layout, &oi->comps, &ios);
1178 if (unlikely(ret)) { 1182 if (unlikely(ret)) {
1179 EXOFS_ERR("exofs_new_inode: exofs_get_io_state failed\n"); 1183 EXOFS_ERR("exofs_new_inode: ore_get_io_state failed\n");
1180 return ERR_PTR(ret); 1184 return ERR_PTR(ret);
1181 } 1185 }
1182 1186
1183 ios->obj.id = exofs_oi_objno(oi);
1184 exofs_make_credential(oi->i_cred, &ios->obj);
1185
1186 ios->done = create_done; 1187 ios->done = create_done;
1187 ios->private = inode; 1188 ios->private = inode;
1188 ios->cred = oi->i_cred; 1189
1189 ret = exofs_sbi_create(ios); 1190 ret = ore_create(ios);
1190 if (ret) { 1191 if (ret) {
1191 exofs_put_io_state(ios); 1192 ore_put_io_state(ios);
1192 return ERR_PTR(ret); 1193 return ERR_PTR(ret);
1193 } 1194 }
1194 atomic_inc(&sbi->s_curr_pending); 1195 atomic_inc(&sbi->s_curr_pending);
@@ -1207,11 +1208,11 @@ struct updatei_args {
1207/* 1208/*
1208 * Callback function from exofs_update_inode(). 1209 * Callback function from exofs_update_inode().
1209 */ 1210 */
1210static void updatei_done(struct exofs_io_state *ios, void *p) 1211static void updatei_done(struct ore_io_state *ios, void *p)
1211{ 1212{
1212 struct updatei_args *args = p; 1213 struct updatei_args *args = p;
1213 1214
1214 exofs_put_io_state(ios); 1215 ore_put_io_state(ios);
1215 1216
1216 atomic_dec(&args->sbi->s_curr_pending); 1217 atomic_dec(&args->sbi->s_curr_pending);
1217 1218
@@ -1227,7 +1228,7 @@ static int exofs_update_inode(struct inode *inode, int do_sync)
1227 struct exofs_i_info *oi = exofs_i(inode); 1228 struct exofs_i_info *oi = exofs_i(inode);
1228 struct super_block *sb = inode->i_sb; 1229 struct super_block *sb = inode->i_sb;
1229 struct exofs_sb_info *sbi = sb->s_fs_info; 1230 struct exofs_sb_info *sbi = sb->s_fs_info;
1230 struct exofs_io_state *ios; 1231 struct ore_io_state *ios;
1231 struct osd_attr attr; 1232 struct osd_attr attr;
1232 struct exofs_fcb *fcb; 1233 struct exofs_fcb *fcb;
1233 struct updatei_args *args; 1234 struct updatei_args *args;
@@ -1266,9 +1267,9 @@ static int exofs_update_inode(struct inode *inode, int do_sync)
1266 } else 1267 } else
1267 memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data)); 1268 memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data));
1268 1269
1269 ret = exofs_get_io_state(&sbi->layout, &ios); 1270 ret = ore_get_io_state(&sbi->layout, &oi->comps, &ios);
1270 if (unlikely(ret)) { 1271 if (unlikely(ret)) {
1271 EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); 1272 EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__);
1272 goto free_args; 1273 goto free_args;
1273 } 1274 }
1274 1275
@@ -1285,13 +1286,13 @@ static int exofs_update_inode(struct inode *inode, int do_sync)
1285 ios->private = args; 1286 ios->private = args;
1286 } 1287 }
1287 1288
1288 ret = exofs_oi_write(oi, ios); 1289 ret = ore_write(ios);
1289 if (!do_sync && !ret) { 1290 if (!do_sync && !ret) {
1290 atomic_inc(&sbi->s_curr_pending); 1291 atomic_inc(&sbi->s_curr_pending);
1291 goto out; /* deallocation in updatei_done */ 1292 goto out; /* deallocation in updatei_done */
1292 } 1293 }
1293 1294
1294 exofs_put_io_state(ios); 1295 ore_put_io_state(ios);
1295free_args: 1296free_args:
1296 kfree(args); 1297 kfree(args);
1297out: 1298out:
@@ -1310,11 +1311,11 @@ int exofs_write_inode(struct inode *inode, struct writeback_control *wbc)
1310 * Callback function from exofs_delete_inode() - don't have much cleaning up to 1311 * Callback function from exofs_delete_inode() - don't have much cleaning up to
1311 * do. 1312 * do.
1312 */ 1313 */
1313static void delete_done(struct exofs_io_state *ios, void *p) 1314static void delete_done(struct ore_io_state *ios, void *p)
1314{ 1315{
1315 struct exofs_sb_info *sbi = p; 1316 struct exofs_sb_info *sbi = p;
1316 1317
1317 exofs_put_io_state(ios); 1318 ore_put_io_state(ios);
1318 1319
1319 atomic_dec(&sbi->s_curr_pending); 1320 atomic_dec(&sbi->s_curr_pending);
1320} 1321}
@@ -1329,7 +1330,7 @@ void exofs_evict_inode(struct inode *inode)
1329 struct exofs_i_info *oi = exofs_i(inode); 1330 struct exofs_i_info *oi = exofs_i(inode);
1330 struct super_block *sb = inode->i_sb; 1331 struct super_block *sb = inode->i_sb;
1331 struct exofs_sb_info *sbi = sb->s_fs_info; 1332 struct exofs_sb_info *sbi = sb->s_fs_info;
1332 struct exofs_io_state *ios; 1333 struct ore_io_state *ios;
1333 int ret; 1334 int ret;
1334 1335
1335 truncate_inode_pages(&inode->i_data, 0); 1336 truncate_inode_pages(&inode->i_data, 0);
@@ -1349,20 +1350,19 @@ void exofs_evict_inode(struct inode *inode)
1349 /* ignore the error, attempt a remove anyway */ 1350 /* ignore the error, attempt a remove anyway */
1350 1351
1351 /* Now Remove the OSD objects */ 1352 /* Now Remove the OSD objects */
1352 ret = exofs_get_io_state(&sbi->layout, &ios); 1353 ret = ore_get_io_state(&sbi->layout, &oi->comps, &ios);
1353 if (unlikely(ret)) { 1354 if (unlikely(ret)) {
1354 EXOFS_ERR("%s: exofs_get_io_state failed\n", __func__); 1355 EXOFS_ERR("%s: ore_get_io_state failed\n", __func__);
1355 return; 1356 return;
1356 } 1357 }
1357 1358
1358 ios->obj.id = exofs_oi_objno(oi);
1359 ios->done = delete_done; 1359 ios->done = delete_done;
1360 ios->private = sbi; 1360 ios->private = sbi;
1361 ios->cred = oi->i_cred; 1361
1362 ret = exofs_sbi_remove(ios); 1362 ret = ore_remove(ios);
1363 if (ret) { 1363 if (ret) {
1364 EXOFS_ERR("%s: exofs_sbi_remove failed\n", __func__); 1364 EXOFS_ERR("%s: ore_remove failed\n", __func__);
1365 exofs_put_io_state(ios); 1365 ore_put_io_state(ios);
1366 return; 1366 return;
1367 } 1367 }
1368 atomic_inc(&sbi->s_curr_pending); 1368 atomic_inc(&sbi->s_curr_pending);
diff --git a/fs/exofs/ios.c b/fs/exofs/ore.c
index f74a2ec027a6..25305af88198 100644
--- a/fs/exofs/ios.c
+++ b/fs/exofs/ore.c
@@ -23,81 +23,87 @@
23 */ 23 */
24 24
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <scsi/scsi_device.h>
27#include <asm/div64.h> 26#include <asm/div64.h>
28 27
29#include "exofs.h" 28#include <scsi/osd_ore.h>
30 29
31#define EXOFS_DBGMSG2(M...) do {} while (0) 30#define ORE_ERR(fmt, a...) printk(KERN_ERR "ore: " fmt, ##a)
32/* #define EXOFS_DBGMSG2 EXOFS_DBGMSG */
33 31
34void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj) 32#ifdef CONFIG_EXOFS_DEBUG
35{ 33#define ORE_DBGMSG(fmt, a...) \
36 osd_sec_init_nosec_doall_caps(cred_a, obj, false, true); 34 printk(KERN_NOTICE "ore @%s:%d: " fmt, __func__, __LINE__, ##a)
37} 35#else
36#define ORE_DBGMSG(fmt, a...) \
37 do { if (0) printk(fmt, ##a); } while (0)
38#endif
38 39
39int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj, 40/* u64 has problems with printk this will cast it to unsigned long long */
40 u64 offset, void *p, unsigned length) 41#define _LLU(x) (unsigned long long)(x)
41{
42 struct osd_request *or = osd_start_request(od, GFP_KERNEL);
43/* struct osd_sense_info osi = {.key = 0};*/
44 int ret;
45 42
46 if (unlikely(!or)) { 43#define ORE_DBGMSG2(M...) do {} while (0)
47 EXOFS_DBGMSG("%s: osd_start_request failed.\n", __func__); 44/* #define ORE_DBGMSG2 ORE_DBGMSG */
48 return -ENOMEM;
49 }
50 ret = osd_req_read_kern(or, obj, offset, p, length);
51 if (unlikely(ret)) {
52 EXOFS_DBGMSG("%s: osd_req_read_kern failed.\n", __func__);
53 goto out;
54 }
55 45
56 ret = osd_finalize_request(or, 0, cred, NULL); 46MODULE_AUTHOR("Boaz Harrosh <bharrosh@panasas.com>");
57 if (unlikely(ret)) { 47MODULE_DESCRIPTION("Objects Raid Engine ore.ko");
58 EXOFS_DBGMSG("Failed to osd_finalize_request() => %d\n", ret); 48MODULE_LICENSE("GPL");
59 goto out;
60 }
61 49
62 ret = osd_execute_request(or); 50static u8 *_ios_cred(struct ore_io_state *ios, unsigned index)
63 if (unlikely(ret)) 51{
64 EXOFS_DBGMSG("osd_execute_request() => %d\n", ret); 52 return ios->comps->comps[index & ios->comps->single_comp].cred;
65 /* osd_req_decode_sense(or, ret); */ 53}
66 54
67out: 55static struct osd_obj_id *_ios_obj(struct ore_io_state *ios, unsigned index)
68 osd_end_request(or); 56{
69 return ret; 57 return &ios->comps->comps[index & ios->comps->single_comp].obj;
70} 58}
71 59
72int exofs_get_io_state(struct exofs_layout *layout, 60static struct osd_dev *_ios_od(struct ore_io_state *ios, unsigned index)
73 struct exofs_io_state **pios)
74{ 61{
75 struct exofs_io_state *ios; 62 return ios->comps->ods[index];
63}
64
65int ore_get_rw_state(struct ore_layout *layout, struct ore_components *comps,
66 bool is_reading, u64 offset, u64 length,
67 struct ore_io_state **pios)
68{
69 struct ore_io_state *ios;
76 70
77 /*TODO: Maybe use kmem_cach per sbi of size 71 /*TODO: Maybe use kmem_cach per sbi of size
78 * exofs_io_state_size(layout->s_numdevs) 72 * exofs_io_state_size(layout->s_numdevs)
79 */ 73 */
80 ios = kzalloc(exofs_io_state_size(layout->s_numdevs), GFP_KERNEL); 74 ios = kzalloc(ore_io_state_size(comps->numdevs), GFP_KERNEL);
81 if (unlikely(!ios)) { 75 if (unlikely(!ios)) {
82 EXOFS_DBGMSG("Failed kzalloc bytes=%d\n", 76 ORE_DBGMSG("Failed kzalloc bytes=%d\n",
83 exofs_io_state_size(layout->s_numdevs)); 77 ore_io_state_size(comps->numdevs));
84 *pios = NULL; 78 *pios = NULL;
85 return -ENOMEM; 79 return -ENOMEM;
86 } 80 }
87 81
88 ios->layout = layout; 82 ios->layout = layout;
89 ios->obj.partition = layout->s_pid; 83 ios->comps = comps;
84 ios->offset = offset;
85 ios->length = length;
86 ios->reading = is_reading;
87
90 *pios = ios; 88 *pios = ios;
91 return 0; 89 return 0;
92} 90}
91EXPORT_SYMBOL(ore_get_rw_state);
92
93int ore_get_io_state(struct ore_layout *layout, struct ore_components *comps,
94 struct ore_io_state **ios)
95{
96 return ore_get_rw_state(layout, comps, true, 0, 0, ios);
97}
98EXPORT_SYMBOL(ore_get_io_state);
93 99
94void exofs_put_io_state(struct exofs_io_state *ios) 100void ore_put_io_state(struct ore_io_state *ios)
95{ 101{
96 if (ios) { 102 if (ios) {
97 unsigned i; 103 unsigned i;
98 104
99 for (i = 0; i < ios->numdevs; i++) { 105 for (i = 0; i < ios->numdevs; i++) {
100 struct exofs_per_dev_state *per_dev = &ios->per_dev[i]; 106 struct ore_per_dev_state *per_dev = &ios->per_dev[i];
101 107
102 if (per_dev->or) 108 if (per_dev->or)
103 osd_end_request(per_dev->or); 109 osd_end_request(per_dev->or);
@@ -108,31 +114,9 @@ void exofs_put_io_state(struct exofs_io_state *ios)
108 kfree(ios); 114 kfree(ios);
109 } 115 }
110} 116}
117EXPORT_SYMBOL(ore_put_io_state);
111 118
112unsigned exofs_layout_od_id(struct exofs_layout *layout, 119static void _sync_done(struct ore_io_state *ios, void *p)
113 osd_id obj_no, unsigned layout_index)
114{
115/* switch (layout->lay_func) {
116 case LAYOUT_MOVING_WINDOW:
117 {*/
118 unsigned dev_mod = obj_no;
119
120 return (layout_index + dev_mod * layout->mirrors_p1) %
121 layout->s_numdevs;
122/* }
123 case LAYOUT_FUNC_IMPLICT:
124 return layout->devs[layout_index];
125 }*/
126}
127
128static inline struct osd_dev *exofs_ios_od(struct exofs_io_state *ios,
129 unsigned layout_index)
130{
131 return ios->layout->s_ods[
132 exofs_layout_od_id(ios->layout, ios->obj.id, layout_index)];
133}
134
135static void _sync_done(struct exofs_io_state *ios, void *p)
136{ 120{
137 struct completion *waiting = p; 121 struct completion *waiting = p;
138 122
@@ -141,20 +125,20 @@ static void _sync_done(struct exofs_io_state *ios, void *p)
141 125
142static void _last_io(struct kref *kref) 126static void _last_io(struct kref *kref)
143{ 127{
144 struct exofs_io_state *ios = container_of( 128 struct ore_io_state *ios = container_of(
145 kref, struct exofs_io_state, kref); 129 kref, struct ore_io_state, kref);
146 130
147 ios->done(ios, ios->private); 131 ios->done(ios, ios->private);
148} 132}
149 133
150static void _done_io(struct osd_request *or, void *p) 134static void _done_io(struct osd_request *or, void *p)
151{ 135{
152 struct exofs_io_state *ios = p; 136 struct ore_io_state *ios = p;
153 137
154 kref_put(&ios->kref, _last_io); 138 kref_put(&ios->kref, _last_io);
155} 139}
156 140
157static int exofs_io_execute(struct exofs_io_state *ios) 141static int ore_io_execute(struct ore_io_state *ios)
158{ 142{
159 DECLARE_COMPLETION_ONSTACK(wait); 143 DECLARE_COMPLETION_ONSTACK(wait);
160 bool sync = (ios->done == NULL); 144 bool sync = (ios->done == NULL);
@@ -170,9 +154,9 @@ static int exofs_io_execute(struct exofs_io_state *ios)
170 if (unlikely(!or)) 154 if (unlikely(!or))
171 continue; 155 continue;
172 156
173 ret = osd_finalize_request(or, 0, ios->cred, NULL); 157 ret = osd_finalize_request(or, 0, _ios_cred(ios, i), NULL);
174 if (unlikely(ret)) { 158 if (unlikely(ret)) {
175 EXOFS_DBGMSG("Failed to osd_finalize_request() => %d\n", 159 ORE_DBGMSG("Failed to osd_finalize_request() => %d\n",
176 ret); 160 ret);
177 return ret; 161 return ret;
178 } 162 }
@@ -194,7 +178,7 @@ static int exofs_io_execute(struct exofs_io_state *ios)
194 178
195 if (sync) { 179 if (sync) {
196 wait_for_completion(&wait); 180 wait_for_completion(&wait);
197 ret = exofs_check_io(ios, NULL); 181 ret = ore_check_io(ios, NULL);
198 } 182 }
199 return ret; 183 return ret;
200} 184}
@@ -214,7 +198,7 @@ static void _clear_bio(struct bio *bio)
214 } 198 }
215} 199}
216 200
217int exofs_check_io(struct exofs_io_state *ios, u64 *resid) 201int ore_check_io(struct ore_io_state *ios, u64 *resid)
218{ 202{
219 enum osd_err_priority acumulated_osd_err = 0; 203 enum osd_err_priority acumulated_osd_err = 0;
220 int acumulated_lin_err = 0; 204 int acumulated_lin_err = 0;
@@ -235,7 +219,7 @@ int exofs_check_io(struct exofs_io_state *ios, u64 *resid)
235 if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) { 219 if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) {
236 /* start read offset passed endof file */ 220 /* start read offset passed endof file */
237 _clear_bio(ios->per_dev[i].bio); 221 _clear_bio(ios->per_dev[i].bio);
238 EXOFS_DBGMSG("start read offset passed end of file " 222 ORE_DBGMSG("start read offset passed end of file "
239 "offset=0x%llx, length=0x%llx\n", 223 "offset=0x%llx, length=0x%llx\n",
240 _LLU(ios->per_dev[i].offset), 224 _LLU(ios->per_dev[i].offset),
241 _LLU(ios->per_dev[i].length)); 225 _LLU(ios->per_dev[i].length));
@@ -259,6 +243,7 @@ int exofs_check_io(struct exofs_io_state *ios, u64 *resid)
259 243
260 return acumulated_lin_err; 244 return acumulated_lin_err;
261} 245}
246EXPORT_SYMBOL(ore_check_io);
262 247
263/* 248/*
264 * L - logical offset into the file 249 * L - logical offset into the file
@@ -305,20 +290,21 @@ int exofs_check_io(struct exofs_io_state *ios, u64 *resid)
305struct _striping_info { 290struct _striping_info {
306 u64 obj_offset; 291 u64 obj_offset;
307 u64 group_length; 292 u64 group_length;
293 u64 M; /* for truncate */
308 unsigned dev; 294 unsigned dev;
309 unsigned unit_off; 295 unsigned unit_off;
310}; 296};
311 297
312static void _calc_stripe_info(struct exofs_io_state *ios, u64 file_offset, 298static void _calc_stripe_info(struct ore_layout *layout, u64 file_offset,
313 struct _striping_info *si) 299 struct _striping_info *si)
314{ 300{
315 u32 stripe_unit = ios->layout->stripe_unit; 301 u32 stripe_unit = layout->stripe_unit;
316 u32 group_width = ios->layout->group_width; 302 u32 group_width = layout->group_width;
317 u64 group_depth = ios->layout->group_depth; 303 u64 group_depth = layout->group_depth;
318 304
319 u32 U = stripe_unit * group_width; 305 u32 U = stripe_unit * group_width;
320 u64 T = U * group_depth; 306 u64 T = U * group_depth;
321 u64 S = T * ios->layout->group_count; 307 u64 S = T * layout->group_count;
322 u64 M = div64_u64(file_offset, S); 308 u64 M = div64_u64(file_offset, S);
323 309
324 /* 310 /*
@@ -333,7 +319,7 @@ static void _calc_stripe_info(struct exofs_io_state *ios, u64 file_offset,
333 319
334 /* "H - (N * U)" is just "H % U" so it's bound to u32 */ 320 /* "H - (N * U)" is just "H % U" so it's bound to u32 */
335 si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width; 321 si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width;
336 si->dev *= ios->layout->mirrors_p1; 322 si->dev *= layout->mirrors_p1;
337 323
338 div_u64_rem(file_offset, stripe_unit, &si->unit_off); 324 div_u64_rem(file_offset, stripe_unit, &si->unit_off);
339 325
@@ -341,15 +327,16 @@ static void _calc_stripe_info(struct exofs_io_state *ios, u64 file_offset,
341 (M * group_depth * stripe_unit); 327 (M * group_depth * stripe_unit);
342 328
343 si->group_length = T - H; 329 si->group_length = T - H;
330 si->M = M;
344} 331}
345 332
346static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_pg, 333static int _add_stripe_unit(struct ore_io_state *ios, unsigned *cur_pg,
347 unsigned pgbase, struct exofs_per_dev_state *per_dev, 334 unsigned pgbase, struct ore_per_dev_state *per_dev,
348 int cur_len) 335 int cur_len)
349{ 336{
350 unsigned pg = *cur_pg; 337 unsigned pg = *cur_pg;
351 struct request_queue *q = 338 struct request_queue *q =
352 osd_request_queue(exofs_ios_od(ios, per_dev->dev)); 339 osd_request_queue(_ios_od(ios, per_dev->dev));
353 340
354 per_dev->length += cur_len; 341 per_dev->length += cur_len;
355 342
@@ -361,7 +348,7 @@ static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_pg,
361 348
362 per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size); 349 per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size);
363 if (unlikely(!per_dev->bio)) { 350 if (unlikely(!per_dev->bio)) {
364 EXOFS_DBGMSG("Failed to allocate BIO size=%u\n", 351 ORE_DBGMSG("Failed to allocate BIO size=%u\n",
365 bio_size); 352 bio_size);
366 return -ENOMEM; 353 return -ENOMEM;
367 } 354 }
@@ -387,7 +374,7 @@ static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_pg,
387 return 0; 374 return 0;
388} 375}
389 376
390static int _prepare_one_group(struct exofs_io_state *ios, u64 length, 377static int _prepare_one_group(struct ore_io_state *ios, u64 length,
391 struct _striping_info *si) 378 struct _striping_info *si)
392{ 379{
393 unsigned stripe_unit = ios->layout->stripe_unit; 380 unsigned stripe_unit = ios->layout->stripe_unit;
@@ -400,7 +387,7 @@ static int _prepare_one_group(struct exofs_io_state *ios, u64 length,
400 int ret = 0; 387 int ret = 0;
401 388
402 while (length) { 389 while (length) {
403 struct exofs_per_dev_state *per_dev = &ios->per_dev[dev]; 390 struct ore_per_dev_state *per_dev = &ios->per_dev[dev];
404 unsigned cur_len, page_off = 0; 391 unsigned cur_len, page_off = 0;
405 392
406 if (!per_dev->length) { 393 if (!per_dev->length) {
@@ -443,7 +430,7 @@ out:
443 return ret; 430 return ret;
444} 431}
445 432
446static int _prepare_for_striping(struct exofs_io_state *ios) 433static int _prepare_for_striping(struct ore_io_state *ios)
447{ 434{
448 u64 length = ios->length; 435 u64 length = ios->length;
449 u64 offset = ios->offset; 436 u64 offset = ios->offset;
@@ -452,9 +439,9 @@ static int _prepare_for_striping(struct exofs_io_state *ios)
452 439
453 if (!ios->pages) { 440 if (!ios->pages) {
454 if (ios->kern_buff) { 441 if (ios->kern_buff) {
455 struct exofs_per_dev_state *per_dev = &ios->per_dev[0]; 442 struct ore_per_dev_state *per_dev = &ios->per_dev[0];
456 443
457 _calc_stripe_info(ios, ios->offset, &si); 444 _calc_stripe_info(ios->layout, ios->offset, &si);
458 per_dev->offset = si.obj_offset; 445 per_dev->offset = si.obj_offset;
459 per_dev->dev = si.dev; 446 per_dev->dev = si.dev;
460 447
@@ -468,7 +455,7 @@ static int _prepare_for_striping(struct exofs_io_state *ios)
468 } 455 }
469 456
470 while (length) { 457 while (length) {
471 _calc_stripe_info(ios, offset, &si); 458 _calc_stripe_info(ios->layout, offset, &si);
472 459
473 if (length < si.group_length) 460 if (length < si.group_length)
474 si.group_length = length; 461 si.group_length = length;
@@ -485,57 +472,59 @@ out:
485 return ret; 472 return ret;
486} 473}
487 474
488int exofs_sbi_create(struct exofs_io_state *ios) 475int ore_create(struct ore_io_state *ios)
489{ 476{
490 int i, ret; 477 int i, ret;
491 478
492 for (i = 0; i < ios->layout->s_numdevs; i++) { 479 for (i = 0; i < ios->comps->numdevs; i++) {
493 struct osd_request *or; 480 struct osd_request *or;
494 481
495 or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL); 482 or = osd_start_request(_ios_od(ios, i), GFP_KERNEL);
496 if (unlikely(!or)) { 483 if (unlikely(!or)) {
497 EXOFS_ERR("%s: osd_start_request failed\n", __func__); 484 ORE_ERR("%s: osd_start_request failed\n", __func__);
498 ret = -ENOMEM; 485 ret = -ENOMEM;
499 goto out; 486 goto out;
500 } 487 }
501 ios->per_dev[i].or = or; 488 ios->per_dev[i].or = or;
502 ios->numdevs++; 489 ios->numdevs++;
503 490
504 osd_req_create_object(or, &ios->obj); 491 osd_req_create_object(or, _ios_obj(ios, i));
505 } 492 }
506 ret = exofs_io_execute(ios); 493 ret = ore_io_execute(ios);
507 494
508out: 495out:
509 return ret; 496 return ret;
510} 497}
498EXPORT_SYMBOL(ore_create);
511 499
512int exofs_sbi_remove(struct exofs_io_state *ios) 500int ore_remove(struct ore_io_state *ios)
513{ 501{
514 int i, ret; 502 int i, ret;
515 503
516 for (i = 0; i < ios->layout->s_numdevs; i++) { 504 for (i = 0; i < ios->comps->numdevs; i++) {
517 struct osd_request *or; 505 struct osd_request *or;
518 506
519 or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL); 507 or = osd_start_request(_ios_od(ios, i), GFP_KERNEL);
520 if (unlikely(!or)) { 508 if (unlikely(!or)) {
521 EXOFS_ERR("%s: osd_start_request failed\n", __func__); 509 ORE_ERR("%s: osd_start_request failed\n", __func__);
522 ret = -ENOMEM; 510 ret = -ENOMEM;
523 goto out; 511 goto out;
524 } 512 }
525 ios->per_dev[i].or = or; 513 ios->per_dev[i].or = or;
526 ios->numdevs++; 514 ios->numdevs++;
527 515
528 osd_req_remove_object(or, &ios->obj); 516 osd_req_remove_object(or, _ios_obj(ios, i));
529 } 517 }
530 ret = exofs_io_execute(ios); 518 ret = ore_io_execute(ios);
531 519
532out: 520out:
533 return ret; 521 return ret;
534} 522}
523EXPORT_SYMBOL(ore_remove);
535 524
536static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp) 525static int _write_mirror(struct ore_io_state *ios, int cur_comp)
537{ 526{
538 struct exofs_per_dev_state *master_dev = &ios->per_dev[cur_comp]; 527 struct ore_per_dev_state *master_dev = &ios->per_dev[cur_comp];
539 unsigned dev = ios->per_dev[cur_comp].dev; 528 unsigned dev = ios->per_dev[cur_comp].dev;
540 unsigned last_comp = cur_comp + ios->layout->mirrors_p1; 529 unsigned last_comp = cur_comp + ios->layout->mirrors_p1;
541 int ret = 0; 530 int ret = 0;
@@ -544,12 +533,12 @@ static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp)
544 return 0; /* Just an empty slot */ 533 return 0; /* Just an empty slot */
545 534
546 for (; cur_comp < last_comp; ++cur_comp, ++dev) { 535 for (; cur_comp < last_comp; ++cur_comp, ++dev) {
547 struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp]; 536 struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp];
548 struct osd_request *or; 537 struct osd_request *or;
549 538
550 or = osd_start_request(exofs_ios_od(ios, dev), GFP_KERNEL); 539 or = osd_start_request(_ios_od(ios, dev), GFP_KERNEL);
551 if (unlikely(!or)) { 540 if (unlikely(!or)) {
552 EXOFS_ERR("%s: osd_start_request failed\n", __func__); 541 ORE_ERR("%s: osd_start_request failed\n", __func__);
553 ret = -ENOMEM; 542 ret = -ENOMEM;
554 goto out; 543 goto out;
555 } 544 }
@@ -563,7 +552,7 @@ static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp)
563 bio = bio_kmalloc(GFP_KERNEL, 552 bio = bio_kmalloc(GFP_KERNEL,
564 master_dev->bio->bi_max_vecs); 553 master_dev->bio->bi_max_vecs);
565 if (unlikely(!bio)) { 554 if (unlikely(!bio)) {
566 EXOFS_DBGMSG( 555 ORE_DBGMSG(
567 "Failed to allocate BIO size=%u\n", 556 "Failed to allocate BIO size=%u\n",
568 master_dev->bio->bi_max_vecs); 557 master_dev->bio->bi_max_vecs);
569 ret = -ENOMEM; 558 ret = -ENOMEM;
@@ -582,25 +571,29 @@ static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp)
582 bio->bi_rw |= REQ_WRITE; 571 bio->bi_rw |= REQ_WRITE;
583 } 572 }
584 573
585 osd_req_write(or, &ios->obj, per_dev->offset, bio, 574 osd_req_write(or, _ios_obj(ios, dev), per_dev->offset,
586 per_dev->length); 575 bio, per_dev->length);
587 EXOFS_DBGMSG("write(0x%llx) offset=0x%llx " 576 ORE_DBGMSG("write(0x%llx) offset=0x%llx "
588 "length=0x%llx dev=%d\n", 577 "length=0x%llx dev=%d\n",
589 _LLU(ios->obj.id), _LLU(per_dev->offset), 578 _LLU(_ios_obj(ios, dev)->id),
579 _LLU(per_dev->offset),
590 _LLU(per_dev->length), dev); 580 _LLU(per_dev->length), dev);
591 } else if (ios->kern_buff) { 581 } else if (ios->kern_buff) {
592 ret = osd_req_write_kern(or, &ios->obj, per_dev->offset, 582 ret = osd_req_write_kern(or, _ios_obj(ios, dev),
593 ios->kern_buff, ios->length); 583 per_dev->offset,
584 ios->kern_buff, ios->length);
594 if (unlikely(ret)) 585 if (unlikely(ret))
595 goto out; 586 goto out;
596 EXOFS_DBGMSG2("write_kern(0x%llx) offset=0x%llx " 587 ORE_DBGMSG2("write_kern(0x%llx) offset=0x%llx "
597 "length=0x%llx dev=%d\n", 588 "length=0x%llx dev=%d\n",
598 _LLU(ios->obj.id), _LLU(per_dev->offset), 589 _LLU(_ios_obj(ios, dev)->id),
590 _LLU(per_dev->offset),
599 _LLU(ios->length), dev); 591 _LLU(ios->length), dev);
600 } else { 592 } else {
601 osd_req_set_attributes(or, &ios->obj); 593 osd_req_set_attributes(or, _ios_obj(ios, dev));
602 EXOFS_DBGMSG2("obj(0x%llx) set_attributes=%d dev=%d\n", 594 ORE_DBGMSG2("obj(0x%llx) set_attributes=%d dev=%d\n",
603 _LLU(ios->obj.id), ios->out_attr_len, dev); 595 _LLU(_ios_obj(ios, dev)->id),
596 ios->out_attr_len, dev);
604 } 597 }
605 598
606 if (ios->out_attr) 599 if (ios->out_attr)
@@ -616,7 +609,7 @@ out:
616 return ret; 609 return ret;
617} 610}
618 611
619int exofs_sbi_write(struct exofs_io_state *ios) 612int ore_write(struct ore_io_state *ios)
620{ 613{
621 int i; 614 int i;
622 int ret; 615 int ret;
@@ -626,52 +619,55 @@ int exofs_sbi_write(struct exofs_io_state *ios)
626 return ret; 619 return ret;
627 620
628 for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) { 621 for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) {
629 ret = _sbi_write_mirror(ios, i); 622 ret = _write_mirror(ios, i);
630 if (unlikely(ret)) 623 if (unlikely(ret))
631 return ret; 624 return ret;
632 } 625 }
633 626
634 ret = exofs_io_execute(ios); 627 ret = ore_io_execute(ios);
635 return ret; 628 return ret;
636} 629}
630EXPORT_SYMBOL(ore_write);
637 631
638static int _sbi_read_mirror(struct exofs_io_state *ios, unsigned cur_comp) 632static int _read_mirror(struct ore_io_state *ios, unsigned cur_comp)
639{ 633{
640 struct osd_request *or; 634 struct osd_request *or;
641 struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp]; 635 struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp];
642 unsigned first_dev = (unsigned)ios->obj.id; 636 struct osd_obj_id *obj = _ios_obj(ios, cur_comp);
637 unsigned first_dev = (unsigned)obj->id;
643 638
644 if (ios->pages && !per_dev->length) 639 if (ios->pages && !per_dev->length)
645 return 0; /* Just an empty slot */ 640 return 0; /* Just an empty slot */
646 641
647 first_dev = per_dev->dev + first_dev % ios->layout->mirrors_p1; 642 first_dev = per_dev->dev + first_dev % ios->layout->mirrors_p1;
648 or = osd_start_request(exofs_ios_od(ios, first_dev), GFP_KERNEL); 643 or = osd_start_request(_ios_od(ios, first_dev), GFP_KERNEL);
649 if (unlikely(!or)) { 644 if (unlikely(!or)) {
650 EXOFS_ERR("%s: osd_start_request failed\n", __func__); 645 ORE_ERR("%s: osd_start_request failed\n", __func__);
651 return -ENOMEM; 646 return -ENOMEM;
652 } 647 }
653 per_dev->or = or; 648 per_dev->or = or;
654 649
655 if (ios->pages) { 650 if (ios->pages) {
656 osd_req_read(or, &ios->obj, per_dev->offset, 651 osd_req_read(or, obj, per_dev->offset,
657 per_dev->bio, per_dev->length); 652 per_dev->bio, per_dev->length);
658 EXOFS_DBGMSG("read(0x%llx) offset=0x%llx length=0x%llx" 653 ORE_DBGMSG("read(0x%llx) offset=0x%llx length=0x%llx"
659 " dev=%d\n", _LLU(ios->obj.id), 654 " dev=%d\n", _LLU(obj->id),
660 _LLU(per_dev->offset), _LLU(per_dev->length), 655 _LLU(per_dev->offset), _LLU(per_dev->length),
661 first_dev); 656 first_dev);
662 } else if (ios->kern_buff) { 657 } else if (ios->kern_buff) {
663 int ret = osd_req_read_kern(or, &ios->obj, per_dev->offset, 658 int ret = osd_req_read_kern(or, obj, per_dev->offset,
664 ios->kern_buff, ios->length); 659 ios->kern_buff, ios->length);
665 EXOFS_DBGMSG2("read_kern(0x%llx) offset=0x%llx " 660 ORE_DBGMSG2("read_kern(0x%llx) offset=0x%llx "
666 "length=0x%llx dev=%d ret=>%d\n", 661 "length=0x%llx dev=%d ret=>%d\n",
667 _LLU(ios->obj.id), _LLU(per_dev->offset), 662 _LLU(obj->id), _LLU(per_dev->offset),
668 _LLU(ios->length), first_dev, ret); 663 _LLU(ios->length), first_dev, ret);
669 if (unlikely(ret)) 664 if (unlikely(ret))
670 return ret; 665 return ret;
671 } else { 666 } else {
672 osd_req_get_attributes(or, &ios->obj); 667 osd_req_get_attributes(or, obj);
673 EXOFS_DBGMSG2("obj(0x%llx) get_attributes=%d dev=%d\n", 668 ORE_DBGMSG2("obj(0x%llx) get_attributes=%d dev=%d\n",
674 _LLU(ios->obj.id), ios->in_attr_len, first_dev); 669 _LLU(obj->id),
670 ios->in_attr_len, first_dev);
675 } 671 }
676 if (ios->out_attr) 672 if (ios->out_attr)
677 osd_req_add_set_attr_list(or, ios->out_attr, ios->out_attr_len); 673 osd_req_add_set_attr_list(or, ios->out_attr, ios->out_attr_len);
@@ -682,7 +678,7 @@ static int _sbi_read_mirror(struct exofs_io_state *ios, unsigned cur_comp)
682 return 0; 678 return 0;
683} 679}
684 680
685int exofs_sbi_read(struct exofs_io_state *ios) 681int ore_read(struct ore_io_state *ios)
686{ 682{
687 int i; 683 int i;
688 int ret; 684 int ret;
@@ -692,16 +688,17 @@ int exofs_sbi_read(struct exofs_io_state *ios)
692 return ret; 688 return ret;
693 689
694 for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) { 690 for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) {
695 ret = _sbi_read_mirror(ios, i); 691 ret = _read_mirror(ios, i);
696 if (unlikely(ret)) 692 if (unlikely(ret))
697 return ret; 693 return ret;
698 } 694 }
699 695
700 ret = exofs_io_execute(ios); 696 ret = ore_io_execute(ios);
701 return ret; 697 return ret;
702} 698}
699EXPORT_SYMBOL(ore_read);
703 700
704int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr) 701int extract_attr_from_ios(struct ore_io_state *ios, struct osd_attr *attr)
705{ 702{
706 struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */ 703 struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */
707 void *iter = NULL; 704 void *iter = NULL;
@@ -721,83 +718,118 @@ int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr)
721 718
722 return -EIO; 719 return -EIO;
723} 720}
721EXPORT_SYMBOL(extract_attr_from_ios);
724 722
725static int _truncate_mirrors(struct exofs_io_state *ios, unsigned cur_comp, 723static int _truncate_mirrors(struct ore_io_state *ios, unsigned cur_comp,
726 struct osd_attr *attr) 724 struct osd_attr *attr)
727{ 725{
728 int last_comp = cur_comp + ios->layout->mirrors_p1; 726 int last_comp = cur_comp + ios->layout->mirrors_p1;
729 727
730 for (; cur_comp < last_comp; ++cur_comp) { 728 for (; cur_comp < last_comp; ++cur_comp) {
731 struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp]; 729 struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp];
732 struct osd_request *or; 730 struct osd_request *or;
733 731
734 or = osd_start_request(exofs_ios_od(ios, cur_comp), GFP_KERNEL); 732 or = osd_start_request(_ios_od(ios, cur_comp), GFP_KERNEL);
735 if (unlikely(!or)) { 733 if (unlikely(!or)) {
736 EXOFS_ERR("%s: osd_start_request failed\n", __func__); 734 ORE_ERR("%s: osd_start_request failed\n", __func__);
737 return -ENOMEM; 735 return -ENOMEM;
738 } 736 }
739 per_dev->or = or; 737 per_dev->or = or;
740 738
741 osd_req_set_attributes(or, &ios->obj); 739 osd_req_set_attributes(or, _ios_obj(ios, cur_comp));
742 osd_req_add_set_attr_list(or, attr, 1); 740 osd_req_add_set_attr_list(or, attr, 1);
743 } 741 }
744 742
745 return 0; 743 return 0;
746} 744}
747 745
748int exofs_oi_truncate(struct exofs_i_info *oi, u64 size) 746struct _trunc_info {
747 struct _striping_info si;
748 u64 prev_group_obj_off;
749 u64 next_group_obj_off;
750
751 unsigned first_group_dev;
752 unsigned nex_group_dev;
753 unsigned max_devs;
754};
755
756void _calc_trunk_info(struct ore_layout *layout, u64 file_offset,
757 struct _trunc_info *ti)
758{
759 unsigned stripe_unit = layout->stripe_unit;
760
761 _calc_stripe_info(layout, file_offset, &ti->si);
762
763 ti->prev_group_obj_off = ti->si.M * stripe_unit;
764 ti->next_group_obj_off = ti->si.M ? (ti->si.M - 1) * stripe_unit : 0;
765
766 ti->first_group_dev = ti->si.dev - (ti->si.dev % layout->group_width);
767 ti->nex_group_dev = ti->first_group_dev + layout->group_width;
768 ti->max_devs = layout->group_width * layout->group_count;
769}
770
771int ore_truncate(struct ore_layout *layout, struct ore_components *comps,
772 u64 size)
749{ 773{
750 struct exofs_sb_info *sbi = oi->vfs_inode.i_sb->s_fs_info; 774 struct ore_io_state *ios;
751 struct exofs_io_state *ios;
752 struct exofs_trunc_attr { 775 struct exofs_trunc_attr {
753 struct osd_attr attr; 776 struct osd_attr attr;
754 __be64 newsize; 777 __be64 newsize;
755 } *size_attrs; 778 } *size_attrs;
756 struct _striping_info si; 779 struct _trunc_info ti;
757 int i, ret; 780 int i, ret;
758 781
759 ret = exofs_get_io_state(&sbi->layout, &ios); 782 ret = ore_get_io_state(layout, comps, &ios);
760 if (unlikely(ret)) 783 if (unlikely(ret))
761 return ret; 784 return ret;
762 785
763 size_attrs = kcalloc(ios->layout->group_width, sizeof(*size_attrs), 786 _calc_trunk_info(ios->layout, size, &ti);
787
788 size_attrs = kcalloc(ti.max_devs, sizeof(*size_attrs),
764 GFP_KERNEL); 789 GFP_KERNEL);
765 if (unlikely(!size_attrs)) { 790 if (unlikely(!size_attrs)) {
766 ret = -ENOMEM; 791 ret = -ENOMEM;
767 goto out; 792 goto out;
768 } 793 }
769 794
770 ios->obj.id = exofs_oi_objno(oi); 795 ios->numdevs = ios->comps->numdevs;
771 ios->cred = oi->i_cred;
772 796
773 ios->numdevs = ios->layout->s_numdevs; 797 for (i = 0; i < ti.max_devs; ++i) {
774 _calc_stripe_info(ios, size, &si);
775
776 for (i = 0; i < ios->layout->group_width; ++i) {
777 struct exofs_trunc_attr *size_attr = &size_attrs[i]; 798 struct exofs_trunc_attr *size_attr = &size_attrs[i];
778 u64 obj_size; 799 u64 obj_size;
779 800
780 if (i < si.dev) 801 if (i < ti.first_group_dev)
781 obj_size = si.obj_offset + 802 obj_size = ti.prev_group_obj_off;
782 ios->layout->stripe_unit - si.unit_off; 803 else if (i >= ti.nex_group_dev)
783 else if (i == si.dev) 804 obj_size = ti.next_group_obj_off;
784 obj_size = si.obj_offset; 805 else if (i < ti.si.dev) /* dev within this group */
785 else /* i > si.dev */ 806 obj_size = ti.si.obj_offset +
786 obj_size = si.obj_offset - si.unit_off; 807 ios->layout->stripe_unit - ti.si.unit_off;
808 else if (i == ti.si.dev)
809 obj_size = ti.si.obj_offset;
810 else /* i > ti.dev */
811 obj_size = ti.si.obj_offset - ti.si.unit_off;
787 812
788 size_attr->newsize = cpu_to_be64(obj_size); 813 size_attr->newsize = cpu_to_be64(obj_size);
789 size_attr->attr = g_attr_logical_length; 814 size_attr->attr = g_attr_logical_length;
790 size_attr->attr.val_ptr = &size_attr->newsize; 815 size_attr->attr.val_ptr = &size_attr->newsize;
791 816
817 ORE_DBGMSG("trunc(0x%llx) obj_offset=0x%llx dev=%d\n",
818 _LLU(comps->comps->obj.id), _LLU(obj_size), i);
792 ret = _truncate_mirrors(ios, i * ios->layout->mirrors_p1, 819 ret = _truncate_mirrors(ios, i * ios->layout->mirrors_p1,
793 &size_attr->attr); 820 &size_attr->attr);
794 if (unlikely(ret)) 821 if (unlikely(ret))
795 goto out; 822 goto out;
796 } 823 }
797 ret = exofs_io_execute(ios); 824 ret = ore_io_execute(ios);
798 825
799out: 826out:
800 kfree(size_attrs); 827 kfree(size_attrs);
801 exofs_put_io_state(ios); 828 ore_put_io_state(ios);
802 return ret; 829 return ret;
803} 830}
831EXPORT_SYMBOL(ore_truncate);
832
833const struct osd_attr g_attr_logical_length = ATTR_DEF(
834 OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8);
835EXPORT_SYMBOL(g_attr_logical_length);
diff --git a/fs/exofs/pnfs.h b/fs/exofs/pnfs.h
deleted file mode 100644
index c52e9888b8ab..000000000000
--- a/fs/exofs/pnfs.h
+++ /dev/null
@@ -1,45 +0,0 @@
1/*
2 * Copyright (C) 2008, 2009
3 * Boaz Harrosh <bharrosh@panasas.com>
4 *
5 * This file is part of exofs.
6 *
7 * exofs is free software; you can redistribute it and/or modify it under the
8 * terms of the GNU General Public License version 2 as published by the Free
9 * Software Foundation.
10 *
11 */
12
13/* FIXME: Remove this file once pnfs hits mainline */
14
15#ifndef __EXOFS_PNFS_H__
16#define __EXOFS_PNFS_H__
17
18#if ! defined(__PNFS_OSD_XDR_H__)
19
20enum pnfs_iomode {
21 IOMODE_READ = 1,
22 IOMODE_RW = 2,
23 IOMODE_ANY = 3,
24};
25
26/* Layout Structure */
27enum pnfs_osd_raid_algorithm4 {
28 PNFS_OSD_RAID_0 = 1,
29 PNFS_OSD_RAID_4 = 2,
30 PNFS_OSD_RAID_5 = 3,
31 PNFS_OSD_RAID_PQ = 4 /* Reed-Solomon P+Q */
32};
33
34struct pnfs_osd_data_map {
35 u32 odm_num_comps;
36 u64 odm_stripe_unit;
37 u32 odm_group_width;
38 u32 odm_group_depth;
39 u32 odm_mirror_cnt;
40 u32 odm_raid_algorithm;
41};
42
43#endif /* ! defined(__PNFS_OSD_XDR_H__) */
44
45#endif /* __EXOFS_PNFS_H__ */
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index c57beddcc217..274894053b02 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -40,6 +40,8 @@
40 40
41#include "exofs.h" 41#include "exofs.h"
42 42
43#define EXOFS_DBGMSG2(M...) do {} while (0)
44
43/****************************************************************************** 45/******************************************************************************
44 * MOUNT OPTIONS 46 * MOUNT OPTIONS
45 *****************************************************************************/ 47 *****************************************************************************/
@@ -208,10 +210,48 @@ static void destroy_inodecache(void)
208} 210}
209 211
210/****************************************************************************** 212/******************************************************************************
211 * SUPERBLOCK FUNCTIONS 213 * Some osd helpers
212 *****************************************************************************/ 214 *****************************************************************************/
213static const struct super_operations exofs_sops; 215void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj)
214static const struct export_operations exofs_export_ops; 216{
217 osd_sec_init_nosec_doall_caps(cred_a, obj, false, true);
218}
219
220static int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj,
221 u64 offset, void *p, unsigned length)
222{
223 struct osd_request *or = osd_start_request(od, GFP_KERNEL);
224/* struct osd_sense_info osi = {.key = 0};*/
225 int ret;
226
227 if (unlikely(!or)) {
228 EXOFS_DBGMSG("%s: osd_start_request failed.\n", __func__);
229 return -ENOMEM;
230 }
231 ret = osd_req_read_kern(or, obj, offset, p, length);
232 if (unlikely(ret)) {
233 EXOFS_DBGMSG("%s: osd_req_read_kern failed.\n", __func__);
234 goto out;
235 }
236
237 ret = osd_finalize_request(or, 0, cred, NULL);
238 if (unlikely(ret)) {
239 EXOFS_DBGMSG("Failed to osd_finalize_request() => %d\n", ret);
240 goto out;
241 }
242
243 ret = osd_execute_request(or);
244 if (unlikely(ret))
245 EXOFS_DBGMSG("osd_execute_request() => %d\n", ret);
246 /* osd_req_decode_sense(or, ret); */
247
248out:
249 osd_end_request(or);
250 EXOFS_DBGMSG2("read_kern(0x%llx) offset=0x%llx "
251 "length=0x%llx dev=%p ret=>%d\n",
252 _LLU(obj->id), _LLU(offset), _LLU(length), od, ret);
253 return ret;
254}
215 255
216static const struct osd_attr g_attr_sb_stats = ATTR_DEF( 256static const struct osd_attr g_attr_sb_stats = ATTR_DEF(
217 EXOFS_APAGE_SB_DATA, 257 EXOFS_APAGE_SB_DATA,
@@ -223,21 +263,19 @@ static int __sbi_read_stats(struct exofs_sb_info *sbi)
223 struct osd_attr attrs[] = { 263 struct osd_attr attrs[] = {
224 [0] = g_attr_sb_stats, 264 [0] = g_attr_sb_stats,
225 }; 265 };
226 struct exofs_io_state *ios; 266 struct ore_io_state *ios;
227 int ret; 267 int ret;
228 268
229 ret = exofs_get_io_state(&sbi->layout, &ios); 269 ret = ore_get_io_state(&sbi->layout, &sbi->comps, &ios);
230 if (unlikely(ret)) { 270 if (unlikely(ret)) {
231 EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); 271 EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__);
232 return ret; 272 return ret;
233 } 273 }
234 274
235 ios->cred = sbi->s_cred;
236
237 ios->in_attr = attrs; 275 ios->in_attr = attrs;
238 ios->in_attr_len = ARRAY_SIZE(attrs); 276 ios->in_attr_len = ARRAY_SIZE(attrs);
239 277
240 ret = exofs_sbi_read(ios); 278 ret = ore_read(ios);
241 if (unlikely(ret)) { 279 if (unlikely(ret)) {
242 EXOFS_ERR("Error reading super_block stats => %d\n", ret); 280 EXOFS_ERR("Error reading super_block stats => %d\n", ret);
243 goto out; 281 goto out;
@@ -264,13 +302,13 @@ static int __sbi_read_stats(struct exofs_sb_info *sbi)
264 } 302 }
265 303
266out: 304out:
267 exofs_put_io_state(ios); 305 ore_put_io_state(ios);
268 return ret; 306 return ret;
269} 307}
270 308
271static void stats_done(struct exofs_io_state *ios, void *p) 309static void stats_done(struct ore_io_state *ios, void *p)
272{ 310{
273 exofs_put_io_state(ios); 311 ore_put_io_state(ios);
274 /* Good thanks nothing to do anymore */ 312 /* Good thanks nothing to do anymore */
275} 313}
276 314
@@ -280,12 +318,12 @@ int exofs_sbi_write_stats(struct exofs_sb_info *sbi)
280 struct osd_attr attrs[] = { 318 struct osd_attr attrs[] = {
281 [0] = g_attr_sb_stats, 319 [0] = g_attr_sb_stats,
282 }; 320 };
283 struct exofs_io_state *ios; 321 struct ore_io_state *ios;
284 int ret; 322 int ret;
285 323
286 ret = exofs_get_io_state(&sbi->layout, &ios); 324 ret = ore_get_io_state(&sbi->layout, &sbi->comps, &ios);
287 if (unlikely(ret)) { 325 if (unlikely(ret)) {
288 EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); 326 EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__);
289 return ret; 327 return ret;
290 } 328 }
291 329
@@ -293,21 +331,27 @@ int exofs_sbi_write_stats(struct exofs_sb_info *sbi)
293 sbi->s_ess.s_numfiles = cpu_to_le64(sbi->s_numfiles); 331 sbi->s_ess.s_numfiles = cpu_to_le64(sbi->s_numfiles);
294 attrs[0].val_ptr = &sbi->s_ess; 332 attrs[0].val_ptr = &sbi->s_ess;
295 333
296 ios->cred = sbi->s_cred; 334
297 ios->done = stats_done; 335 ios->done = stats_done;
298 ios->private = sbi; 336 ios->private = sbi;
299 ios->out_attr = attrs; 337 ios->out_attr = attrs;
300 ios->out_attr_len = ARRAY_SIZE(attrs); 338 ios->out_attr_len = ARRAY_SIZE(attrs);
301 339
302 ret = exofs_sbi_write(ios); 340 ret = ore_write(ios);
303 if (unlikely(ret)) { 341 if (unlikely(ret)) {
304 EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__); 342 EXOFS_ERR("%s: ore_write failed.\n", __func__);
305 exofs_put_io_state(ios); 343 ore_put_io_state(ios);
306 } 344 }
307 345
308 return ret; 346 return ret;
309} 347}
310 348
349/******************************************************************************
350 * SUPERBLOCK FUNCTIONS
351 *****************************************************************************/
352static const struct super_operations exofs_sops;
353static const struct export_operations exofs_export_ops;
354
311/* 355/*
312 * Write the superblock to the OSD 356 * Write the superblock to the OSD
313 */ 357 */
@@ -315,7 +359,9 @@ int exofs_sync_fs(struct super_block *sb, int wait)
315{ 359{
316 struct exofs_sb_info *sbi; 360 struct exofs_sb_info *sbi;
317 struct exofs_fscb *fscb; 361 struct exofs_fscb *fscb;
318 struct exofs_io_state *ios; 362 struct ore_comp one_comp;
363 struct ore_components comps;
364 struct ore_io_state *ios;
319 int ret = -ENOMEM; 365 int ret = -ENOMEM;
320 366
321 fscb = kmalloc(sizeof(*fscb), GFP_KERNEL); 367 fscb = kmalloc(sizeof(*fscb), GFP_KERNEL);
@@ -331,7 +377,10 @@ int exofs_sync_fs(struct super_block *sb, int wait)
331 * version). Otherwise the exofs_fscb is read-only from mkfs time. All 377 * version). Otherwise the exofs_fscb is read-only from mkfs time. All
332 * the writeable info is set in exofs_sbi_write_stats() above. 378 * the writeable info is set in exofs_sbi_write_stats() above.
333 */ 379 */
334 ret = exofs_get_io_state(&sbi->layout, &ios); 380
381 exofs_init_comps(&comps, &one_comp, sbi, EXOFS_SUPER_ID);
382
383 ret = ore_get_io_state(&sbi->layout, &comps, &ios);
335 if (unlikely(ret)) 384 if (unlikely(ret))
336 goto out; 385 goto out;
337 386
@@ -345,14 +394,12 @@ int exofs_sync_fs(struct super_block *sb, int wait)
345 fscb->s_newfs = 0; 394 fscb->s_newfs = 0;
346 fscb->s_version = EXOFS_FSCB_VER; 395 fscb->s_version = EXOFS_FSCB_VER;
347 396
348 ios->obj.id = EXOFS_SUPER_ID;
349 ios->offset = 0; 397 ios->offset = 0;
350 ios->kern_buff = fscb; 398 ios->kern_buff = fscb;
351 ios->cred = sbi->s_cred;
352 399
353 ret = exofs_sbi_write(ios); 400 ret = ore_write(ios);
354 if (unlikely(ret)) 401 if (unlikely(ret))
355 EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__); 402 EXOFS_ERR("%s: ore_write failed.\n", __func__);
356 else 403 else
357 sb->s_dirt = 0; 404 sb->s_dirt = 0;
358 405
@@ -360,7 +407,7 @@ int exofs_sync_fs(struct super_block *sb, int wait)
360 unlock_super(sb); 407 unlock_super(sb);
361out: 408out:
362 EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret); 409 EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret);
363 exofs_put_io_state(ios); 410 ore_put_io_state(ios);
364 kfree(fscb); 411 kfree(fscb);
365 return ret; 412 return ret;
366} 413}
@@ -384,15 +431,17 @@ static void _exofs_print_device(const char *msg, const char *dev_path,
384 431
385void exofs_free_sbi(struct exofs_sb_info *sbi) 432void exofs_free_sbi(struct exofs_sb_info *sbi)
386{ 433{
387 while (sbi->layout.s_numdevs) { 434 while (sbi->comps.numdevs) {
388 int i = --sbi->layout.s_numdevs; 435 int i = --sbi->comps.numdevs;
389 struct osd_dev *od = sbi->layout.s_ods[i]; 436 struct osd_dev *od = sbi->comps.ods[i];
390 437
391 if (od) { 438 if (od) {
392 sbi->layout.s_ods[i] = NULL; 439 sbi->comps.ods[i] = NULL;
393 osduld_put_device(od); 440 osduld_put_device(od);
394 } 441 }
395 } 442 }
443 if (sbi->comps.ods != sbi->_min_one_dev)
444 kfree(sbi->comps.ods);
396 kfree(sbi); 445 kfree(sbi);
397} 446}
398 447
@@ -419,8 +468,8 @@ static void exofs_put_super(struct super_block *sb)
419 msecs_to_jiffies(100)); 468 msecs_to_jiffies(100));
420 } 469 }
421 470
422 _exofs_print_device("Unmounting", NULL, sbi->layout.s_ods[0], 471 _exofs_print_device("Unmounting", NULL, sbi->comps.ods[0],
423 sbi->layout.s_pid); 472 sbi->one_comp.obj.partition);
424 473
425 bdi_destroy(&sbi->bdi); 474 bdi_destroy(&sbi->bdi);
426 exofs_free_sbi(sbi); 475 exofs_free_sbi(sbi);
@@ -501,10 +550,19 @@ static int _read_and_match_data_map(struct exofs_sb_info *sbi, unsigned numdevs,
501 return -EINVAL; 550 return -EINVAL;
502 } 551 }
503 552
553 EXOFS_DBGMSG("exofs: layout: "
554 "num_comps=%u stripe_unit=0x%x group_width=%u "
555 "group_depth=0x%llx mirrors_p1=%u raid_algorithm=%u\n",
556 numdevs,
557 sbi->layout.stripe_unit,
558 sbi->layout.group_width,
559 _LLU(sbi->layout.group_depth),
560 sbi->layout.mirrors_p1,
561 sbi->data_map.odm_raid_algorithm);
504 return 0; 562 return 0;
505} 563}
506 564
507static unsigned __ra_pages(struct exofs_layout *layout) 565static unsigned __ra_pages(struct ore_layout *layout)
508{ 566{
509 const unsigned _MIN_RA = 32; /* min 128K read-ahead */ 567 const unsigned _MIN_RA = 32; /* min 128K read-ahead */
510 unsigned ra_pages = layout->group_width * layout->stripe_unit / 568 unsigned ra_pages = layout->group_width * layout->stripe_unit /
@@ -547,13 +605,11 @@ static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev,
547 return !(odi->systemid_len || odi->osdname_len); 605 return !(odi->systemid_len || odi->osdname_len);
548} 606}
549 607
550static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi, 608static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi,
609 struct osd_dev *fscb_od,
551 unsigned table_count) 610 unsigned table_count)
552{ 611{
553 struct exofs_sb_info *sbi = *psbi; 612 struct ore_comp comp;
554 struct osd_dev *fscb_od;
555 struct osd_obj_id obj = {.partition = sbi->layout.s_pid,
556 .id = EXOFS_DEVTABLE_ID};
557 struct exofs_device_table *dt; 613 struct exofs_device_table *dt;
558 unsigned table_bytes = table_count * sizeof(dt->dt_dev_table[0]) + 614 unsigned table_bytes = table_count * sizeof(dt->dt_dev_table[0]) +
559 sizeof(*dt); 615 sizeof(*dt);
@@ -567,10 +623,14 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
567 return -ENOMEM; 623 return -ENOMEM;
568 } 624 }
569 625
570 fscb_od = sbi->layout.s_ods[0]; 626 sbi->comps.numdevs = 0;
571 sbi->layout.s_ods[0] = NULL; 627
572 sbi->layout.s_numdevs = 0; 628 comp.obj.partition = sbi->one_comp.obj.partition;
573 ret = exofs_read_kern(fscb_od, sbi->s_cred, &obj, 0, dt, table_bytes); 629 comp.obj.id = EXOFS_DEVTABLE_ID;
630 exofs_make_credential(comp.cred, &comp.obj);
631
632 ret = exofs_read_kern(fscb_od, comp.cred, &comp.obj, 0, dt,
633 table_bytes);
574 if (unlikely(ret)) { 634 if (unlikely(ret)) {
575 EXOFS_ERR("ERROR: reading device table\n"); 635 EXOFS_ERR("ERROR: reading device table\n");
576 goto out; 636 goto out;
@@ -588,16 +648,18 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
588 goto out; 648 goto out;
589 649
590 if (likely(numdevs > 1)) { 650 if (likely(numdevs > 1)) {
591 unsigned size = numdevs * sizeof(sbi->layout.s_ods[0]); 651 unsigned size = numdevs * sizeof(sbi->comps.ods[0]);
592 652
593 sbi = krealloc(sbi, sizeof(*sbi) + size, GFP_KERNEL); 653 /* Twice bigger table: See exofs_init_comps() and below
594 if (unlikely(!sbi)) { 654 * comment
655 */
656 sbi->comps.ods = kzalloc(size + size - 1, GFP_KERNEL);
657 if (unlikely(!sbi->comps.ods)) {
658 EXOFS_ERR("ERROR: faild allocating Device array[%d]\n",
659 numdevs);
595 ret = -ENOMEM; 660 ret = -ENOMEM;
596 goto out; 661 goto out;
597 } 662 }
598 memset(&sbi->layout.s_ods[1], 0,
599 size - sizeof(sbi->layout.s_ods[0]));
600 *psbi = sbi;
601 } 663 }
602 664
603 for (i = 0; i < numdevs; i++) { 665 for (i = 0; i < numdevs; i++) {
@@ -619,8 +681,8 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
619 * line. We always keep them in device-table order. 681 * line. We always keep them in device-table order.
620 */ 682 */
621 if (fscb_od && osduld_device_same(fscb_od, &odi)) { 683 if (fscb_od && osduld_device_same(fscb_od, &odi)) {
622 sbi->layout.s_ods[i] = fscb_od; 684 sbi->comps.ods[i] = fscb_od;
623 ++sbi->layout.s_numdevs; 685 ++sbi->comps.numdevs;
624 fscb_od = NULL; 686 fscb_od = NULL;
625 continue; 687 continue;
626 } 688 }
@@ -633,13 +695,13 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
633 goto out; 695 goto out;
634 } 696 }
635 697
636 sbi->layout.s_ods[i] = od; 698 sbi->comps.ods[i] = od;
637 ++sbi->layout.s_numdevs; 699 ++sbi->comps.numdevs;
638 700
639 /* Read the fscb of the other devices to make sure the FS 701 /* Read the fscb of the other devices to make sure the FS
640 * partition is there. 702 * partition is there.
641 */ 703 */
642 ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb, 704 ret = exofs_read_kern(od, comp.cred, &comp.obj, 0, &fscb,
643 sizeof(fscb)); 705 sizeof(fscb));
644 if (unlikely(ret)) { 706 if (unlikely(ret)) {
645 EXOFS_ERR("ERROR: Malformed participating device " 707 EXOFS_ERR("ERROR: Malformed participating device "
@@ -656,13 +718,22 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
656 718
657out: 719out:
658 kfree(dt); 720 kfree(dt);
659 if (unlikely(!ret && fscb_od)) { 721 if (likely(!ret)) {
660 EXOFS_ERR( 722 unsigned numdevs = sbi->comps.numdevs;
661 "ERROR: Bad device-table container device not present\n");
662 osduld_put_device(fscb_od);
663 ret = -EINVAL;
664 }
665 723
724 if (unlikely(fscb_od)) {
725 EXOFS_ERR("ERROR: Bad device-table container device not present\n");
726 osduld_put_device(fscb_od);
727 return -EINVAL;
728 }
729 /* exofs round-robins the device table view according to inode
730 * number. We hold a: twice bigger table hence inodes can point
731 * to any device and have a sequential view of the table
732 * starting at this device. See exofs_init_comps()
733 */
734 for (i = 0; i < numdevs - 1; ++i)
735 sbi->comps.ods[i + numdevs] = sbi->comps.ods[i];
736 }
666 return ret; 737 return ret;
667} 738}
668 739
@@ -676,7 +747,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
676 struct exofs_sb_info *sbi; /*extended info */ 747 struct exofs_sb_info *sbi; /*extended info */
677 struct osd_dev *od; /* Master device */ 748 struct osd_dev *od; /* Master device */
678 struct exofs_fscb fscb; /*on-disk superblock info */ 749 struct exofs_fscb fscb; /*on-disk superblock info */
679 struct osd_obj_id obj; 750 struct ore_comp comp;
680 unsigned table_count; 751 unsigned table_count;
681 int ret; 752 int ret;
682 753
@@ -684,10 +755,6 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
684 if (!sbi) 755 if (!sbi)
685 return -ENOMEM; 756 return -ENOMEM;
686 757
687 ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY);
688 if (ret)
689 goto free_bdi;
690
691 /* use mount options to fill superblock */ 758 /* use mount options to fill superblock */
692 if (opts->is_osdname) { 759 if (opts->is_osdname) {
693 struct osd_dev_info odi = {.systemid_len = 0}; 760 struct osd_dev_info odi = {.systemid_len = 0};
@@ -695,6 +762,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
695 odi.osdname_len = strlen(opts->dev_name); 762 odi.osdname_len = strlen(opts->dev_name);
696 odi.osdname = (u8 *)opts->dev_name; 763 odi.osdname = (u8 *)opts->dev_name;
697 od = osduld_info_lookup(&odi); 764 od = osduld_info_lookup(&odi);
765 kfree(opts->dev_name);
766 opts->dev_name = NULL;
698 } else { 767 } else {
699 od = osduld_path_lookup(opts->dev_name); 768 od = osduld_path_lookup(opts->dev_name);
700 } 769 }
@@ -709,11 +778,16 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
709 sbi->layout.group_width = 1; 778 sbi->layout.group_width = 1;
710 sbi->layout.group_depth = -1; 779 sbi->layout.group_depth = -1;
711 sbi->layout.group_count = 1; 780 sbi->layout.group_count = 1;
712 sbi->layout.s_ods[0] = od;
713 sbi->layout.s_numdevs = 1;
714 sbi->layout.s_pid = opts->pid;
715 sbi->s_timeout = opts->timeout; 781 sbi->s_timeout = opts->timeout;
716 782
783 sbi->one_comp.obj.partition = opts->pid;
784 sbi->one_comp.obj.id = 0;
785 exofs_make_credential(sbi->one_comp.cred, &sbi->one_comp.obj);
786 sbi->comps.numdevs = 1;
787 sbi->comps.single_comp = EC_SINGLE_COMP;
788 sbi->comps.comps = &sbi->one_comp;
789 sbi->comps.ods = sbi->_min_one_dev;
790
717 /* fill in some other data by hand */ 791 /* fill in some other data by hand */
718 memset(sb->s_id, 0, sizeof(sb->s_id)); 792 memset(sb->s_id, 0, sizeof(sb->s_id));
719 strcpy(sb->s_id, "exofs"); 793 strcpy(sb->s_id, "exofs");
@@ -724,11 +798,11 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
724 sb->s_bdev = NULL; 798 sb->s_bdev = NULL;
725 sb->s_dev = 0; 799 sb->s_dev = 0;
726 800
727 obj.partition = sbi->layout.s_pid; 801 comp.obj.partition = sbi->one_comp.obj.partition;
728 obj.id = EXOFS_SUPER_ID; 802 comp.obj.id = EXOFS_SUPER_ID;
729 exofs_make_credential(sbi->s_cred, &obj); 803 exofs_make_credential(comp.cred, &comp.obj);
730 804
731 ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb, sizeof(fscb)); 805 ret = exofs_read_kern(od, comp.cred, &comp.obj, 0, &fscb, sizeof(fscb));
732 if (unlikely(ret)) 806 if (unlikely(ret))
733 goto free_sbi; 807 goto free_sbi;
734 808
@@ -757,9 +831,11 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
757 831
758 table_count = le64_to_cpu(fscb.s_dev_table_count); 832 table_count = le64_to_cpu(fscb.s_dev_table_count);
759 if (table_count) { 833 if (table_count) {
760 ret = exofs_read_lookup_dev_table(&sbi, table_count); 834 ret = exofs_read_lookup_dev_table(sbi, od, table_count);
761 if (unlikely(ret)) 835 if (unlikely(ret))
762 goto free_sbi; 836 goto free_sbi;
837 } else {
838 sbi->comps.ods[0] = od;
763 } 839 }
764 840
765 __sbi_read_stats(sbi); 841 __sbi_read_stats(sbi);
@@ -793,20 +869,20 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
793 goto free_sbi; 869 goto free_sbi;
794 } 870 }
795 871
796 _exofs_print_device("Mounting", opts->dev_name, sbi->layout.s_ods[0], 872 ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY);
797 sbi->layout.s_pid); 873 if (ret) {
798 if (opts->is_osdname) 874 EXOFS_DBGMSG("Failed to bdi_setup_and_register\n");
799 kfree(opts->dev_name); 875 goto free_sbi;
876 }
877
878 _exofs_print_device("Mounting", opts->dev_name, sbi->comps.ods[0],
879 sbi->one_comp.obj.partition);
800 return 0; 880 return 0;
801 881
802free_sbi: 882free_sbi:
803 bdi_destroy(&sbi->bdi);
804free_bdi:
805 EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n", 883 EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n",
806 opts->dev_name, sbi->layout.s_pid, ret); 884 opts->dev_name, sbi->one_comp.obj.partition, ret);
807 exofs_free_sbi(sbi); 885 exofs_free_sbi(sbi);
808 if (opts->is_osdname)
809 kfree(opts->dev_name);
810 return ret; 886 return ret;
811} 887}
812 888
@@ -837,7 +913,7 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf)
837{ 913{
838 struct super_block *sb = dentry->d_sb; 914 struct super_block *sb = dentry->d_sb;
839 struct exofs_sb_info *sbi = sb->s_fs_info; 915 struct exofs_sb_info *sbi = sb->s_fs_info;
840 struct exofs_io_state *ios; 916 struct ore_io_state *ios;
841 struct osd_attr attrs[] = { 917 struct osd_attr attrs[] = {
842 ATTR_DEF(OSD_APAGE_PARTITION_QUOTAS, 918 ATTR_DEF(OSD_APAGE_PARTITION_QUOTAS,
843 OSD_ATTR_PQ_CAPACITY_QUOTA, sizeof(__be64)), 919 OSD_ATTR_PQ_CAPACITY_QUOTA, sizeof(__be64)),
@@ -846,21 +922,18 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf)
846 }; 922 };
847 uint64_t capacity = ULLONG_MAX; 923 uint64_t capacity = ULLONG_MAX;
848 uint64_t used = ULLONG_MAX; 924 uint64_t used = ULLONG_MAX;
849 uint8_t cred_a[OSD_CAP_LEN];
850 int ret; 925 int ret;
851 926
852 ret = exofs_get_io_state(&sbi->layout, &ios); 927 ret = ore_get_io_state(&sbi->layout, &sbi->comps, &ios);
853 if (ret) { 928 if (ret) {
854 EXOFS_DBGMSG("exofs_get_io_state failed.\n"); 929 EXOFS_DBGMSG("ore_get_io_state failed.\n");
855 return ret; 930 return ret;
856 } 931 }
857 932
858 exofs_make_credential(cred_a, &ios->obj);
859 ios->cred = sbi->s_cred;
860 ios->in_attr = attrs; 933 ios->in_attr = attrs;
861 ios->in_attr_len = ARRAY_SIZE(attrs); 934 ios->in_attr_len = ARRAY_SIZE(attrs);
862 935
863 ret = exofs_sbi_read(ios); 936 ret = ore_read(ios);
864 if (unlikely(ret)) 937 if (unlikely(ret))
865 goto out; 938 goto out;
866 939
@@ -889,7 +962,7 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf)
889 buf->f_namelen = EXOFS_NAME_LEN; 962 buf->f_namelen = EXOFS_NAME_LEN;
890 963
891out: 964out:
892 exofs_put_io_state(ios); 965 ore_put_io_state(ios);
893 return ret; 966 return ret;
894} 967}
895 968
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index e2d88baf91d3..4687fea0c00f 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -124,7 +124,7 @@ void *ext4_kvzalloc(size_t size, gfp_t flags)
124{ 124{
125 void *ret; 125 void *ret;
126 126
127 ret = kmalloc(size, flags); 127 ret = kzalloc(size, flags);
128 if (!ret) 128 if (!ret)
129 ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL); 129 ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL);
130 return ret; 130 return ret;
diff --git a/fs/inode.c b/fs/inode.c
index 5aab80dc008c..73920d555c88 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -143,6 +143,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
143 inode->i_op = &empty_iops; 143 inode->i_op = &empty_iops;
144 inode->i_fop = &empty_fops; 144 inode->i_fop = &empty_fops;
145 inode->i_nlink = 1; 145 inode->i_nlink = 1;
146 inode->i_opflags = 0;
146 inode->i_uid = 0; 147 inode->i_uid = 0;
147 inode->i_gid = 0; 148 inode->i_gid = 0;
148 atomic_set(&inode->i_writecount, 0); 149 atomic_set(&inode->i_writecount, 0);
diff --git a/fs/namei.c b/fs/namei.c
index 445fd5da11fa..2826db35dc25 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -179,19 +179,14 @@ static int check_acl(struct inode *inode, int mask)
179#ifdef CONFIG_FS_POSIX_ACL 179#ifdef CONFIG_FS_POSIX_ACL
180 struct posix_acl *acl; 180 struct posix_acl *acl;
181 181
182 /*
183 * Under RCU walk, we cannot even do a "get_cached_acl()",
184 * because that involves locking and getting a refcount on
185 * a cached ACL.
186 *
187 * So the only case we handle during RCU walking is the
188 * case of a cached "no ACL at all", which needs no locks
189 * or refcounts.
190 */
191 if (mask & MAY_NOT_BLOCK) { 182 if (mask & MAY_NOT_BLOCK) {
192 if (negative_cached_acl(inode, ACL_TYPE_ACCESS)) 183 acl = get_cached_acl_rcu(inode, ACL_TYPE_ACCESS);
184 if (!acl)
193 return -EAGAIN; 185 return -EAGAIN;
194 return -ECHILD; 186 /* no ->get_acl() calls in RCU mode... */
187 if (acl == ACL_NOT_CACHED)
188 return -ECHILD;
189 return posix_acl_permission(inode, acl, mask & ~MAY_NOT_BLOCK);
195 } 190 }
196 191
197 acl = get_cached_acl(inode, ACL_TYPE_ACCESS); 192 acl = get_cached_acl(inode, ACL_TYPE_ACCESS);
@@ -313,6 +308,26 @@ int generic_permission(struct inode *inode, int mask)
313 return -EACCES; 308 return -EACCES;
314} 309}
315 310
311/*
312 * We _really_ want to just do "generic_permission()" without
313 * even looking at the inode->i_op values. So we keep a cache
314 * flag in inode->i_opflags, that says "this has not special
315 * permission function, use the fast case".
316 */
317static inline int do_inode_permission(struct inode *inode, int mask)
318{
319 if (unlikely(!(inode->i_opflags & IOP_FASTPERM))) {
320 if (likely(inode->i_op->permission))
321 return inode->i_op->permission(inode, mask);
322
323 /* This gets set once for the inode lifetime */
324 spin_lock(&inode->i_lock);
325 inode->i_opflags |= IOP_FASTPERM;
326 spin_unlock(&inode->i_lock);
327 }
328 return generic_permission(inode, mask);
329}
330
316/** 331/**
317 * inode_permission - check for access rights to a given inode 332 * inode_permission - check for access rights to a given inode
318 * @inode: inode to check permission on 333 * @inode: inode to check permission on
@@ -327,7 +342,7 @@ int inode_permission(struct inode *inode, int mask)
327{ 342{
328 int retval; 343 int retval;
329 344
330 if (mask & MAY_WRITE) { 345 if (unlikely(mask & MAY_WRITE)) {
331 umode_t mode = inode->i_mode; 346 umode_t mode = inode->i_mode;
332 347
333 /* 348 /*
@@ -344,11 +359,7 @@ int inode_permission(struct inode *inode, int mask)
344 return -EACCES; 359 return -EACCES;
345 } 360 }
346 361
347 if (inode->i_op->permission) 362 retval = do_inode_permission(inode, mask);
348 retval = inode->i_op->permission(inode, mask);
349 else
350 retval = generic_permission(inode, mask);
351
352 if (retval) 363 if (retval)
353 return retval; 364 return retval;
354 365
@@ -1250,6 +1261,26 @@ static void terminate_walk(struct nameidata *nd)
1250 } 1261 }
1251} 1262}
1252 1263
1264/*
1265 * Do we need to follow links? We _really_ want to be able
1266 * to do this check without having to look at inode->i_op,
1267 * so we keep a cache of "no, this doesn't need follow_link"
1268 * for the common case.
1269 */
1270static inline int should_follow_link(struct inode *inode, int follow)
1271{
1272 if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) {
1273 if (likely(inode->i_op->follow_link))
1274 return follow;
1275
1276 /* This gets set once for the inode lifetime */
1277 spin_lock(&inode->i_lock);
1278 inode->i_opflags |= IOP_NOFOLLOW;
1279 spin_unlock(&inode->i_lock);
1280 }
1281 return 0;
1282}
1283
1253static inline int walk_component(struct nameidata *nd, struct path *path, 1284static inline int walk_component(struct nameidata *nd, struct path *path,
1254 struct qstr *name, int type, int follow) 1285 struct qstr *name, int type, int follow)
1255{ 1286{
@@ -1272,7 +1303,7 @@ static inline int walk_component(struct nameidata *nd, struct path *path,
1272 terminate_walk(nd); 1303 terminate_walk(nd);
1273 return -ENOENT; 1304 return -ENOENT;
1274 } 1305 }
1275 if (unlikely(inode->i_op->follow_link) && follow) { 1306 if (should_follow_link(inode, follow)) {
1276 if (nd->flags & LOOKUP_RCU) { 1307 if (nd->flags & LOOKUP_RCU) {
1277 if (unlikely(unlazy_walk(nd, path->dentry))) { 1308 if (unlikely(unlazy_walk(nd, path->dentry))) {
1278 terminate_walk(nd); 1309 terminate_walk(nd);
@@ -1325,6 +1356,26 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd)
1325} 1356}
1326 1357
1327/* 1358/*
1359 * We really don't want to look at inode->i_op->lookup
1360 * when we don't have to. So we keep a cache bit in
1361 * the inode ->i_opflags field that says "yes, we can
1362 * do lookup on this inode".
1363 */
1364static inline int can_lookup(struct inode *inode)
1365{
1366 if (likely(inode->i_opflags & IOP_LOOKUP))
1367 return 1;
1368 if (likely(!inode->i_op->lookup))
1369 return 0;
1370
1371 /* We do this once for the lifetime of the inode */
1372 spin_lock(&inode->i_lock);
1373 inode->i_opflags |= IOP_LOOKUP;
1374 spin_unlock(&inode->i_lock);
1375 return 1;
1376}
1377
1378/*
1328 * Name resolution. 1379 * Name resolution.
1329 * This is the basic name resolution function, turning a pathname into 1380 * This is the basic name resolution function, turning a pathname into
1330 * the final dentry. We expect 'base' to be positive and a directory. 1381 * the final dentry. We expect 'base' to be positive and a directory.
@@ -1403,10 +1454,10 @@ static int link_path_walk(const char *name, struct nameidata *nd)
1403 if (err) 1454 if (err)
1404 return err; 1455 return err;
1405 } 1456 }
1457 if (can_lookup(nd->inode))
1458 continue;
1406 err = -ENOTDIR; 1459 err = -ENOTDIR;
1407 if (!nd->inode->i_op->lookup) 1460 break;
1408 break;
1409 continue;
1410 /* here ends the main loop */ 1461 /* here ends the main loop */
1411 1462
1412last_component: 1463last_component:
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 08e3eccf9a12..5eb02069e1b8 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1118,7 +1118,7 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
1118 * Warn that /proc/pid/oom_adj is deprecated, see 1118 * Warn that /proc/pid/oom_adj is deprecated, see
1119 * Documentation/feature-removal-schedule.txt. 1119 * Documentation/feature-removal-schedule.txt.
1120 */ 1120 */
1121 WARN_ONCE(1, "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n", 1121 printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n",
1122 current->comm, task_pid_nr(current), task_pid_nr(task), 1122 current->comm, task_pid_nr(current), task_pid_nr(task),
1123 task_pid_nr(task)); 1123 task_pid_nr(task));
1124 task->signal->oom_adj = oom_adjust; 1124 task->signal->oom_adj = oom_adjust;
@@ -1919,6 +1919,14 @@ static int proc_fd_info(struct inode *inode, struct path *path, char *info)
1919 spin_lock(&files->file_lock); 1919 spin_lock(&files->file_lock);
1920 file = fcheck_files(files, fd); 1920 file = fcheck_files(files, fd);
1921 if (file) { 1921 if (file) {
1922 unsigned int f_flags;
1923 struct fdtable *fdt;
1924
1925 fdt = files_fdtable(files);
1926 f_flags = file->f_flags & ~O_CLOEXEC;
1927 if (FD_ISSET(fd, fdt->close_on_exec))
1928 f_flags |= O_CLOEXEC;
1929
1922 if (path) { 1930 if (path) {
1923 *path = file->f_path; 1931 *path = file->f_path;
1924 path_get(&file->f_path); 1932 path_get(&file->f_path);
@@ -1928,7 +1936,7 @@ static int proc_fd_info(struct inode *inode, struct path *path, char *info)
1928 "pos:\t%lli\n" 1936 "pos:\t%lli\n"
1929 "flags:\t0%o\n", 1937 "flags:\t0%o\n",
1930 (long long) file->f_pos, 1938 (long long) file->f_pos,
1931 file->f_flags); 1939 f_flags);
1932 spin_unlock(&files->file_lock); 1940 spin_unlock(&files->file_lock);
1933 put_files_struct(files); 1941 put_files_struct(files);
1934 return 0; 1942 return 0;
diff --git a/fs/stack.c b/fs/stack.c
index 4a6f7f440658..b4f2ab48a61f 100644
--- a/fs/stack.c
+++ b/fs/stack.c
@@ -29,10 +29,7 @@ void fsstack_copy_inode_size(struct inode *dst, struct inode *src)
29 * 29 *
30 * We don't actually know what locking is used at the lower level; 30 * We don't actually know what locking is used at the lower level;
31 * but if it's a filesystem that supports quotas, it will be using 31 * but if it's a filesystem that supports quotas, it will be using
32 * i_lock as in inode_add_bytes(). tmpfs uses other locking, and 32 * i_lock as in inode_add_bytes().
33 * its 32-bit is (just) able to exceed 2TB i_size with the aid of
34 * holes; but its i_blocks cannot carry into the upper long without
35 * almost 2TB swap - let's ignore that case.
36 */ 33 */
37 if (sizeof(i_blocks) > sizeof(long)) 34 if (sizeof(i_blocks) > sizeof(long))
38 spin_lock(&src->i_lock); 35 spin_lock(&src->i_lock);
diff --git a/fs/stat.c b/fs/stat.c
index 961039121cb8..ba5316ffac61 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -27,12 +27,12 @@ void generic_fillattr(struct inode *inode, struct kstat *stat)
27 stat->uid = inode->i_uid; 27 stat->uid = inode->i_uid;
28 stat->gid = inode->i_gid; 28 stat->gid = inode->i_gid;
29 stat->rdev = inode->i_rdev; 29 stat->rdev = inode->i_rdev;
30 stat->size = i_size_read(inode);
30 stat->atime = inode->i_atime; 31 stat->atime = inode->i_atime;
31 stat->mtime = inode->i_mtime; 32 stat->mtime = inode->i_mtime;
32 stat->ctime = inode->i_ctime; 33 stat->ctime = inode->i_ctime;
33 stat->size = i_size_read(inode);
34 stat->blocks = inode->i_blocks;
35 stat->blksize = (1 << inode->i_blkbits); 34 stat->blksize = (1 << inode->i_blkbits);
35 stat->blocks = inode->i_blocks;
36} 36}
37 37
38EXPORT_SYMBOL(generic_fillattr); 38EXPORT_SYMBOL(generic_fillattr);